[ARM] Remove deprecated mwords-little-endian.
[official-gcc.git] / gcc / config / arm / arm.c
bloba6af12fe846b80692c105eb3296d1b970fa6ad9a
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
70 void (*arm_lang_output_object_attributes_hook)(void);
72 struct four_ints
74 int i[4];
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static int thumb_far_jump_used_p (void);
96 static bool thumb_force_lr_save (void);
97 static unsigned arm_size_return_regs (void);
98 static bool arm_assemble_integer (rtx, unsigned int, int);
99 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
100 static void arm_print_operand (FILE *, rtx, int);
101 static void arm_print_operand_address (FILE *, rtx);
102 static bool arm_print_operand_punct_valid_p (unsigned char code);
103 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
104 static arm_cc get_arm_condition_code (rtx);
105 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
106 static const char *output_multi_immediate (rtx *, const char *, const char *,
107 int, HOST_WIDE_INT);
108 static const char *shift_op (rtx, HOST_WIDE_INT *);
109 static struct machine_function *arm_init_machine_status (void);
110 static void thumb_exit (FILE *, int);
111 static HOST_WIDE_INT get_jump_table_size (rtx);
112 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
113 static Mnode *add_minipool_forward_ref (Mfix *);
114 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_backward_ref (Mfix *);
116 static void assign_minipool_offsets (Mfix *);
117 static void arm_print_value (FILE *, rtx);
118 static void dump_minipool (rtx);
119 static int arm_barrier_cost (rtx);
120 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
121 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
122 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
123 rtx);
124 static void arm_reorg (void);
125 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
126 static unsigned long arm_compute_save_reg0_reg12_mask (void);
127 static unsigned long arm_compute_save_reg_mask (void);
128 static unsigned long arm_isr_value (tree);
129 static unsigned long arm_compute_func_type (void);
130 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
131 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
134 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
135 #endif
136 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
137 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
138 static int arm_comp_type_attributes (const_tree, const_tree);
139 static void arm_set_default_type_attributes (tree);
140 static int arm_adjust_cost (rtx, rtx, rtx, int);
141 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
142 static int optimal_immediate_sequence (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence);
145 static int optimal_immediate_sequence_1 (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence,
148 int i);
149 static int arm_get_strip_length (int);
150 static bool arm_function_ok_for_sibcall (tree, tree);
151 static enum machine_mode arm_promote_function_mode (const_tree,
152 enum machine_mode, int *,
153 const_tree, int);
154 static bool arm_return_in_memory (const_tree, const_tree);
155 static rtx arm_function_value (const_tree, const_tree, bool);
156 static rtx arm_libcall_value_1 (enum machine_mode);
157 static rtx arm_libcall_value (enum machine_mode, const_rtx);
158 static bool arm_function_value_regno_p (const unsigned int);
159 static void arm_internal_label (FILE *, const char *, unsigned long);
160 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
161 tree);
162 static bool arm_have_conditional_execution (void);
163 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
164 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
165 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
166 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
167 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
172 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
175 static void arm_init_builtins (void);
176 static void arm_init_iwmmxt_builtins (void);
177 static rtx safe_vector_operand (rtx, enum machine_mode);
178 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
179 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
180 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
181 static tree arm_builtin_decl (unsigned, bool);
182 static void emit_constant_insn (rtx cond, rtx pattern);
183 static rtx emit_set_insn (rtx, rtx);
184 static rtx emit_multi_reg_push (unsigned long, unsigned long);
185 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
186 tree, bool);
187 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
188 const_tree, bool);
189 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
190 const_tree, bool);
191 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
192 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
193 const_tree);
194 static rtx aapcs_libcall_value (enum machine_mode);
195 static int aapcs_select_return_coproc (const_tree, const_tree);
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
200 #endif
201 #ifndef ARM_PE
202 static void arm_encode_section_info (tree, rtx, int);
203 #endif
205 static void arm_file_end (void);
206 static void arm_file_start (void);
208 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 enum machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 static void arm_asm_init_sections (void);
223 #endif
224 static rtx arm_dwarf_register_span (rtx);
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
241 static bool arm_cannot_copy_insn_p (rtx);
242 static int arm_issue_rate (void);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static const char *arm_invalid_parameter_type (const_tree t);
248 static const char *arm_invalid_return_type (const_tree t);
249 static tree arm_promoted_type (const_tree t);
250 static tree arm_convert_to_type (tree type, tree expr);
251 static bool arm_scalar_mode_supported_p (enum machine_mode);
252 static bool arm_frame_pointer_required (void);
253 static bool arm_can_eliminate (const int, const int);
254 static void arm_asm_trampoline_template (FILE *);
255 static void arm_trampoline_init (rtx, tree, rtx);
256 static rtx arm_trampoline_adjust_address (rtx);
257 static rtx arm_pic_static_addr (rtx orig, rtx reg);
258 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
259 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
260 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
261 static bool arm_array_mode_supported_p (enum machine_mode,
262 unsigned HOST_WIDE_INT);
263 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
264 static bool arm_class_likely_spilled_p (reg_class_t);
265 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
266 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
267 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
268 const_tree type,
269 int misalignment,
270 bool is_packed);
271 static void arm_conditional_register_usage (void);
272 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
273 static unsigned int arm_autovectorize_vector_sizes (void);
274 static int arm_default_branch_cost (bool, bool);
275 static int arm_cortex_a5_branch_cost (bool, bool);
276 static int arm_cortex_m_branch_cost (bool, bool);
278 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
279 const unsigned char *sel);
281 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
282 tree vectype,
283 int misalign ATTRIBUTE_UNUSED);
284 static unsigned arm_add_stmt_cost (void *data, int count,
285 enum vect_cost_for_stmt kind,
286 struct _stmt_vec_info *stmt_info,
287 int misalign,
288 enum vect_cost_model_location where);
290 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
291 bool op0_preserve_value);
292 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
294 /* Table of machine attributes. */
295 static const struct attribute_spec arm_attribute_table[] =
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
298 affects_type_identity } */
299 /* Function calls made to this symbol must be done indirectly, because
300 it may lie outside of the 26 bit addressing range of a normal function
301 call. */
302 { "long_call", 0, 0, false, true, true, NULL, false },
303 /* Whereas these functions are always known to reside within the 26 bit
304 addressing range. */
305 { "short_call", 0, 0, false, true, true, NULL, false },
306 /* Specify the procedure call conventions for a function. */
307 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
308 false },
309 /* Interrupt Service Routines have special prologue and epilogue requirements. */
310 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
311 false },
312 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
313 false },
314 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
315 false },
316 #ifdef ARM_PE
317 /* ARM/PE has three new attributes:
318 interfacearm - ?
319 dllexport - for exporting a function/variable that will live in a dll
320 dllimport - for importing a function/variable from a dll
322 Microsoft allows multiple declspecs in one __declspec, separating
323 them with spaces. We do NOT support this. Instead, use __declspec
324 multiple times.
326 { "dllimport", 0, 0, true, false, false, NULL, false },
327 { "dllexport", 0, 0, true, false, false, NULL, false },
328 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
329 false },
330 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
332 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
334 false },
335 #endif
336 { NULL, 0, 0, false, false, false, NULL, false }
339 /* Initialize the GCC target structure. */
340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 #undef TARGET_MERGE_DECL_ATTRIBUTES
342 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
343 #endif
345 #undef TARGET_LEGITIMIZE_ADDRESS
346 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
348 #undef TARGET_LRA_P
349 #define TARGET_LRA_P arm_lra_p
351 #undef TARGET_ATTRIBUTE_TABLE
352 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
354 #undef TARGET_ASM_FILE_START
355 #define TARGET_ASM_FILE_START arm_file_start
356 #undef TARGET_ASM_FILE_END
357 #define TARGET_ASM_FILE_END arm_file_end
359 #undef TARGET_ASM_ALIGNED_SI_OP
360 #define TARGET_ASM_ALIGNED_SI_OP NULL
361 #undef TARGET_ASM_INTEGER
362 #define TARGET_ASM_INTEGER arm_assemble_integer
364 #undef TARGET_PRINT_OPERAND
365 #define TARGET_PRINT_OPERAND arm_print_operand
366 #undef TARGET_PRINT_OPERAND_ADDRESS
367 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
368 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
369 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
374 #undef TARGET_ASM_FUNCTION_PROLOGUE
375 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
377 #undef TARGET_ASM_FUNCTION_EPILOGUE
378 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
380 #undef TARGET_OPTION_OVERRIDE
381 #define TARGET_OPTION_OVERRIDE arm_option_override
383 #undef TARGET_COMP_TYPE_ATTRIBUTES
384 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
386 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
387 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER arm_sched_reorder
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
398 #undef TARGET_MEMORY_MOVE_COST
399 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
401 #undef TARGET_ENCODE_SECTION_INFO
402 #ifdef ARM_PE
403 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
404 #else
405 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
406 #endif
408 #undef TARGET_STRIP_NAME_ENCODING
409 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
411 #undef TARGET_ASM_INTERNAL_LABEL
412 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
415 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
417 #undef TARGET_FUNCTION_VALUE
418 #define TARGET_FUNCTION_VALUE arm_function_value
420 #undef TARGET_LIBCALL_VALUE
421 #define TARGET_LIBCALL_VALUE arm_libcall_value
423 #undef TARGET_FUNCTION_VALUE_REGNO_P
424 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
426 #undef TARGET_ASM_OUTPUT_MI_THUNK
427 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
431 #undef TARGET_RTX_COSTS
432 #define TARGET_RTX_COSTS arm_rtx_costs
433 #undef TARGET_ADDRESS_COST
434 #define TARGET_ADDRESS_COST arm_address_cost
436 #undef TARGET_SHIFT_TRUNCATION_MASK
437 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
439 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
440 #undef TARGET_ARRAY_MODE_SUPPORTED_P
441 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
446 arm_autovectorize_vector_sizes
448 #undef TARGET_MACHINE_DEPENDENT_REORG
449 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
451 #undef TARGET_INIT_BUILTINS
452 #define TARGET_INIT_BUILTINS arm_init_builtins
453 #undef TARGET_EXPAND_BUILTIN
454 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
455 #undef TARGET_BUILTIN_DECL
456 #define TARGET_BUILTIN_DECL arm_builtin_decl
458 #undef TARGET_INIT_LIBFUNCS
459 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
461 #undef TARGET_PROMOTE_FUNCTION_MODE
462 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
463 #undef TARGET_PROMOTE_PROTOTYPES
464 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
465 #undef TARGET_PASS_BY_REFERENCE
466 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
469 #undef TARGET_FUNCTION_ARG
470 #define TARGET_FUNCTION_ARG arm_function_arg
471 #undef TARGET_FUNCTION_ARG_ADVANCE
472 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
473 #undef TARGET_FUNCTION_ARG_BOUNDARY
474 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
476 #undef TARGET_SETUP_INCOMING_VARARGS
477 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
479 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
480 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
482 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
483 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
484 #undef TARGET_TRAMPOLINE_INIT
485 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
486 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
487 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
489 #undef TARGET_WARN_FUNC_RETURN
490 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
492 #undef TARGET_DEFAULT_SHORT_ENUMS
493 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
495 #undef TARGET_ALIGN_ANON_BITFIELD
496 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
498 #undef TARGET_NARROW_VOLATILE_BITFIELD
499 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
501 #undef TARGET_CXX_GUARD_TYPE
502 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
504 #undef TARGET_CXX_GUARD_MASK_BIT
505 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
507 #undef TARGET_CXX_GET_COOKIE_SIZE
508 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
510 #undef TARGET_CXX_COOKIE_HAS_SIZE
511 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
513 #undef TARGET_CXX_CDTOR_RETURNS_THIS
514 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
516 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
517 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
519 #undef TARGET_CXX_USE_AEABI_ATEXIT
520 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
522 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
523 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
524 arm_cxx_determine_class_data_visibility
526 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
527 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
529 #undef TARGET_RETURN_IN_MSB
530 #define TARGET_RETURN_IN_MSB arm_return_in_msb
532 #undef TARGET_RETURN_IN_MEMORY
533 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
538 #if ARM_UNWIND_INFO
539 #undef TARGET_ASM_UNWIND_EMIT
540 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
542 /* EABI unwinding tables use a different format for the typeinfo tables. */
543 #undef TARGET_ASM_TTYPE
544 #define TARGET_ASM_TTYPE arm_output_ttype
546 #undef TARGET_ARM_EABI_UNWINDER
547 #define TARGET_ARM_EABI_UNWINDER true
549 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
550 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
552 #undef TARGET_ASM_INIT_SECTIONS
553 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
554 #endif /* ARM_UNWIND_INFO */
556 #undef TARGET_DWARF_REGISTER_SPAN
557 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
559 #undef TARGET_CANNOT_COPY_INSN_P
560 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
562 #ifdef HAVE_AS_TLS
563 #undef TARGET_HAVE_TLS
564 #define TARGET_HAVE_TLS true
565 #endif
567 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
568 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
570 #undef TARGET_LEGITIMATE_CONSTANT_P
571 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
573 #undef TARGET_CANNOT_FORCE_CONST_MEM
574 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
576 #undef TARGET_MAX_ANCHOR_OFFSET
577 #define TARGET_MAX_ANCHOR_OFFSET 4095
579 /* The minimum is set such that the total size of the block
580 for a particular anchor is -4088 + 1 + 4095 bytes, which is
581 divisible by eight, ensuring natural spacing of anchors. */
582 #undef TARGET_MIN_ANCHOR_OFFSET
583 #define TARGET_MIN_ANCHOR_OFFSET -4088
585 #undef TARGET_SCHED_ISSUE_RATE
586 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
588 #undef TARGET_MANGLE_TYPE
589 #define TARGET_MANGLE_TYPE arm_mangle_type
591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
594 #undef TARGET_BUILD_BUILTIN_VA_LIST
595 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
596 #undef TARGET_EXPAND_BUILTIN_VA_START
597 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
601 #ifdef HAVE_AS_TLS
602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
604 #endif
606 #undef TARGET_LEGITIMATE_ADDRESS_P
607 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
609 #undef TARGET_PREFERRED_RELOAD_CLASS
610 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
612 #undef TARGET_INVALID_PARAMETER_TYPE
613 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
615 #undef TARGET_INVALID_RETURN_TYPE
616 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
618 #undef TARGET_PROMOTED_TYPE
619 #define TARGET_PROMOTED_TYPE arm_promoted_type
621 #undef TARGET_CONVERT_TO_TYPE
622 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
627 #undef TARGET_FRAME_POINTER_REQUIRED
628 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE arm_can_eliminate
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
636 #undef TARGET_CLASS_LIKELY_SPILLED_P
637 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
639 #undef TARGET_VECTORIZE_BUILTINS
640 #define TARGET_VECTORIZE_BUILTINS
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
644 arm_builtin_vectorized_function
646 #undef TARGET_VECTOR_ALIGNMENT
647 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
649 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
650 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
651 arm_vector_alignment_reachable
653 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
654 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
655 arm_builtin_support_vector_misalignment
657 #undef TARGET_PREFERRED_RENAME_CLASS
658 #define TARGET_PREFERRED_RENAME_CLASS \
659 arm_preferred_rename_class
661 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
662 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
663 arm_vectorize_vec_perm_const_ok
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
667 arm_builtin_vectorization_cost
668 #undef TARGET_VECTORIZE_ADD_STMT_COST
669 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
671 #undef TARGET_CANONICALIZE_COMPARISON
672 #define TARGET_CANONICALIZE_COMPARISON \
673 arm_canonicalize_comparison
675 #undef TARGET_ASAN_SHADOW_OFFSET
676 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
678 #undef MAX_INSN_PER_IT_BLOCK
679 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
681 #undef TARGET_CAN_USE_DOLOOP_P
682 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
684 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
685 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
687 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
688 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
690 struct gcc_target targetm = TARGET_INITIALIZER;
692 /* Obstack for minipool constant handling. */
693 static struct obstack minipool_obstack;
694 static char * minipool_startobj;
696 /* The maximum number of insns skipped which
697 will be conditionalised if possible. */
698 static int max_insns_skipped = 5;
700 extern FILE * asm_out_file;
702 /* True if we are currently building a constant table. */
703 int making_const_table;
705 /* The processor for which instructions should be scheduled. */
706 enum processor_type arm_tune = arm_none;
708 /* The current tuning set. */
709 const struct tune_params *current_tune;
711 /* Which floating point hardware to schedule for. */
712 int arm_fpu_attr;
714 /* Which floating popint hardware to use. */
715 const struct arm_fpu_desc *arm_fpu_desc;
717 /* Used for Thumb call_via trampolines. */
718 rtx thumb_call_via_label[14];
719 static int thumb_call_reg_needed;
721 /* Bit values used to identify processor capabilities. */
722 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
723 #define FL_ARCH3M (1 << 1) /* Extended multiply */
724 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
725 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
726 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
727 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
728 #define FL_THUMB (1 << 6) /* Thumb aware */
729 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
730 #define FL_STRONG (1 << 8) /* StrongARM */
731 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
732 #define FL_XSCALE (1 << 10) /* XScale */
733 /* spare (1 << 11) */
734 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
735 media instructions. */
736 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
737 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
738 Note: ARM6 & 7 derivatives only. */
739 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
740 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
741 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
742 profile. */
743 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
744 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
745 #define FL_NEON (1 << 20) /* Neon instructions. */
746 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
747 architecture. */
748 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
749 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
750 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
751 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
753 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
754 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
756 /* Flags that only effect tuning, not available instructions. */
757 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
758 | FL_CO_PROC)
760 #define FL_FOR_ARCH2 FL_NOTM
761 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
762 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
763 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
764 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
765 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
766 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
767 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
768 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
769 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
770 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
771 #define FL_FOR_ARCH6J FL_FOR_ARCH6
772 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
773 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
774 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
775 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
776 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
777 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
778 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
779 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
780 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
781 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
782 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
783 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
785 /* The bits in this mask specify which
786 instructions we are allowed to generate. */
787 static unsigned long insn_flags = 0;
789 /* The bits in this mask specify which instruction scheduling options should
790 be used. */
791 static unsigned long tune_flags = 0;
793 /* The highest ARM architecture version supported by the
794 target. */
795 enum base_architecture arm_base_arch = BASE_ARCH_0;
797 /* The following are used in the arm.md file as equivalents to bits
798 in the above two flag variables. */
800 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
801 int arm_arch3m = 0;
803 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
804 int arm_arch4 = 0;
806 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
807 int arm_arch4t = 0;
809 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
810 int arm_arch5 = 0;
812 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
813 int arm_arch5e = 0;
815 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
816 int arm_arch6 = 0;
818 /* Nonzero if this chip supports the ARM 6K extensions. */
819 int arm_arch6k = 0;
821 /* Nonzero if instructions present in ARMv6-M can be used. */
822 int arm_arch6m = 0;
824 /* Nonzero if this chip supports the ARM 7 extensions. */
825 int arm_arch7 = 0;
827 /* Nonzero if instructions not present in the 'M' profile can be used. */
828 int arm_arch_notm = 0;
830 /* Nonzero if instructions present in ARMv7E-M can be used. */
831 int arm_arch7em = 0;
833 /* Nonzero if instructions present in ARMv8 can be used. */
834 int arm_arch8 = 0;
836 /* Nonzero if this chip can benefit from load scheduling. */
837 int arm_ld_sched = 0;
839 /* Nonzero if this chip is a StrongARM. */
840 int arm_tune_strongarm = 0;
842 /* Nonzero if this chip supports Intel Wireless MMX technology. */
843 int arm_arch_iwmmxt = 0;
845 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
846 int arm_arch_iwmmxt2 = 0;
848 /* Nonzero if this chip is an XScale. */
849 int arm_arch_xscale = 0;
851 /* Nonzero if tuning for XScale */
852 int arm_tune_xscale = 0;
854 /* Nonzero if we want to tune for stores that access the write-buffer.
855 This typically means an ARM6 or ARM7 with MMU or MPU. */
856 int arm_tune_wbuf = 0;
858 /* Nonzero if tuning for Cortex-A9. */
859 int arm_tune_cortex_a9 = 0;
861 /* Nonzero if generating Thumb instructions. */
862 int thumb_code = 0;
864 /* Nonzero if generating Thumb-1 instructions. */
865 int thumb1_code = 0;
867 /* Nonzero if we should define __THUMB_INTERWORK__ in the
868 preprocessor.
869 XXX This is a bit of a hack, it's intended to help work around
870 problems in GLD which doesn't understand that armv5t code is
871 interworking clean. */
872 int arm_cpp_interwork = 0;
874 /* Nonzero if chip supports Thumb 2. */
875 int arm_arch_thumb2;
877 /* Nonzero if chip supports integer division instruction. */
878 int arm_arch_arm_hwdiv;
879 int arm_arch_thumb_hwdiv;
881 /* Nonzero if we should use Neon to handle 64-bits operations rather
882 than core registers. */
883 int prefer_neon_for_64bits = 0;
885 /* Nonzero if we shouldn't use literal pools. */
886 bool arm_disable_literal_pool = false;
888 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
889 we must report the mode of the memory reference from
890 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
891 enum machine_mode output_memory_reference_mode;
893 /* The register number to be used for the PIC offset register. */
894 unsigned arm_pic_register = INVALID_REGNUM;
896 enum arm_pcs arm_pcs_default;
898 /* For an explanation of these variables, see final_prescan_insn below. */
899 int arm_ccfsm_state;
900 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
901 enum arm_cond_code arm_current_cc;
903 rtx arm_target_insn;
904 int arm_target_label;
905 /* The number of conditionally executed insns, including the current insn. */
906 int arm_condexec_count = 0;
907 /* A bitmask specifying the patterns for the IT block.
908 Zero means do not output an IT block before this insn. */
909 int arm_condexec_mask = 0;
910 /* The number of bits used in arm_condexec_mask. */
911 int arm_condexec_masklen = 0;
913 /* Nonzero if chip supports the ARMv8 CRC instructions. */
914 int arm_arch_crc = 0;
916 /* The condition codes of the ARM, and the inverse function. */
917 static const char * const arm_condition_codes[] =
919 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
920 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
923 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
924 int arm_regs_in_sequence[] =
926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
929 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
930 #define streq(string1, string2) (strcmp (string1, string2) == 0)
932 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
933 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
934 | (1 << PIC_OFFSET_TABLE_REGNUM)))
936 /* Initialization code. */
938 struct processors
940 const char *const name;
941 enum processor_type core;
942 const char *arch;
943 enum base_architecture base_arch;
944 const unsigned long flags;
945 const struct tune_params *const tune;
949 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
950 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
951 prefetch_slots, \
952 l1_size, \
953 l1_line_size
955 /* arm generic vectorizer costs. */
956 static const
957 struct cpu_vec_costs arm_default_vec_cost = {
958 1, /* scalar_stmt_cost. */
959 1, /* scalar load_cost. */
960 1, /* scalar_store_cost. */
961 1, /* vec_stmt_cost. */
962 1, /* vec_to_scalar_cost. */
963 1, /* scalar_to_vec_cost. */
964 1, /* vec_align_load_cost. */
965 1, /* vec_unalign_load_cost. */
966 1, /* vec_unalign_store_cost. */
967 1, /* vec_store_cost. */
968 3, /* cond_taken_branch_cost. */
969 1, /* cond_not_taken_branch_cost. */
972 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
973 #include "aarch-cost-tables.h"
977 const struct cpu_cost_table cortexa9_extra_costs =
979 /* ALU */
981 0, /* arith. */
982 0, /* logical. */
983 0, /* shift. */
984 COSTS_N_INSNS (1), /* shift_reg. */
985 COSTS_N_INSNS (1), /* arith_shift. */
986 COSTS_N_INSNS (2), /* arith_shift_reg. */
987 0, /* log_shift. */
988 COSTS_N_INSNS (1), /* log_shift_reg. */
989 COSTS_N_INSNS (1), /* extend. */
990 COSTS_N_INSNS (2), /* extend_arith. */
991 COSTS_N_INSNS (1), /* bfi. */
992 COSTS_N_INSNS (1), /* bfx. */
993 0, /* clz. */
994 0, /* rev. */
995 0, /* non_exec. */
996 true /* non_exec_costs_exec. */
999 /* MULT SImode */
1001 COSTS_N_INSNS (3), /* simple. */
1002 COSTS_N_INSNS (3), /* flag_setting. */
1003 COSTS_N_INSNS (2), /* extend. */
1004 COSTS_N_INSNS (3), /* add. */
1005 COSTS_N_INSNS (2), /* extend_add. */
1006 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1008 /* MULT DImode */
1010 0, /* simple (N/A). */
1011 0, /* flag_setting (N/A). */
1012 COSTS_N_INSNS (4), /* extend. */
1013 0, /* add (N/A). */
1014 COSTS_N_INSNS (4), /* extend_add. */
1015 0 /* idiv (N/A). */
1018 /* LD/ST */
1020 COSTS_N_INSNS (2), /* load. */
1021 COSTS_N_INSNS (2), /* load_sign_extend. */
1022 COSTS_N_INSNS (2), /* ldrd. */
1023 COSTS_N_INSNS (2), /* ldm_1st. */
1024 1, /* ldm_regs_per_insn_1st. */
1025 2, /* ldm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (5), /* loadf. */
1027 COSTS_N_INSNS (5), /* loadd. */
1028 COSTS_N_INSNS (1), /* load_unaligned. */
1029 COSTS_N_INSNS (2), /* store. */
1030 COSTS_N_INSNS (2), /* strd. */
1031 COSTS_N_INSNS (2), /* stm_1st. */
1032 1, /* stm_regs_per_insn_1st. */
1033 2, /* stm_regs_per_insn_subsequent. */
1034 COSTS_N_INSNS (1), /* storef. */
1035 COSTS_N_INSNS (1), /* stored. */
1036 COSTS_N_INSNS (1) /* store_unaligned. */
1039 /* FP SFmode */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1055 /* FP DFmode */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1072 /* Vector */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs =
1080 /* ALU */
1082 0, /* arith. */
1083 0, /* logical. */
1084 COSTS_N_INSNS (1), /* shift. */
1085 0, /* shift_reg. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1090 0, /* extend. */
1091 0, /* extend_arith. */
1092 0, /* bfi. */
1093 0, /* bfx. */
1094 0, /* clz. */
1095 0, /* rev. */
1096 0, /* non_exec. */
1097 true /* non_exec_costs_exec. */
1100 /* MULT SImode */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1109 /* MULT DImode */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1114 0, /* add (N/A). */
1115 COSTS_N_INSNS (2), /* extend_add. */
1116 0 /* idiv (N/A). */
1119 /* LD/ST */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1) /* store_unaligned. */
1140 /* FP SFmode */
1142 COSTS_N_INSNS (36), /* div. */
1143 COSTS_N_INSNS (11), /* mult. */
1144 COSTS_N_INSNS (20), /* mult_addsub. */
1145 COSTS_N_INSNS (30), /* fma. */
1146 COSTS_N_INSNS (9), /* addsub. */
1147 COSTS_N_INSNS (3), /* fpconst. */
1148 COSTS_N_INSNS (3), /* neg. */
1149 COSTS_N_INSNS (6), /* compare. */
1150 COSTS_N_INSNS (4), /* widen. */
1151 COSTS_N_INSNS (4), /* narrow. */
1152 COSTS_N_INSNS (8), /* toint. */
1153 COSTS_N_INSNS (8), /* fromint. */
1154 COSTS_N_INSNS (8) /* roundint. */
1156 /* FP DFmode */
1158 COSTS_N_INSNS (64), /* div. */
1159 COSTS_N_INSNS (16), /* mult. */
1160 COSTS_N_INSNS (25), /* mult_addsub. */
1161 COSTS_N_INSNS (30), /* fma. */
1162 COSTS_N_INSNS (9), /* addsub. */
1163 COSTS_N_INSNS (3), /* fpconst. */
1164 COSTS_N_INSNS (3), /* neg. */
1165 COSTS_N_INSNS (6), /* compare. */
1166 COSTS_N_INSNS (6), /* widen. */
1167 COSTS_N_INSNS (6), /* narrow. */
1168 COSTS_N_INSNS (8), /* toint. */
1169 COSTS_N_INSNS (8), /* fromint. */
1170 COSTS_N_INSNS (8) /* roundint. */
1173 /* Vector */
1175 COSTS_N_INSNS (1) /* alu. */
1181 const struct cpu_cost_table cortexa7_extra_costs =
1183 /* ALU */
1185 0, /* arith. */
1186 0, /* logical. */
1187 COSTS_N_INSNS (1), /* shift. */
1188 COSTS_N_INSNS (1), /* shift_reg. */
1189 COSTS_N_INSNS (1), /* arith_shift. */
1190 COSTS_N_INSNS (1), /* arith_shift_reg. */
1191 COSTS_N_INSNS (1), /* log_shift. */
1192 COSTS_N_INSNS (1), /* log_shift_reg. */
1193 COSTS_N_INSNS (1), /* extend. */
1194 COSTS_N_INSNS (1), /* extend_arith. */
1195 COSTS_N_INSNS (1), /* bfi. */
1196 COSTS_N_INSNS (1), /* bfx. */
1197 COSTS_N_INSNS (1), /* clz. */
1198 COSTS_N_INSNS (1), /* rev. */
1199 0, /* non_exec. */
1200 true /* non_exec_costs_exec. */
1204 /* MULT SImode */
1206 0, /* simple. */
1207 COSTS_N_INSNS (1), /* flag_setting. */
1208 COSTS_N_INSNS (1), /* extend. */
1209 COSTS_N_INSNS (1), /* add. */
1210 COSTS_N_INSNS (1), /* extend_add. */
1211 COSTS_N_INSNS (7) /* idiv. */
1213 /* MULT DImode */
1215 0, /* simple (N/A). */
1216 0, /* flag_setting (N/A). */
1217 COSTS_N_INSNS (1), /* extend. */
1218 0, /* add. */
1219 COSTS_N_INSNS (2), /* extend_add. */
1220 0 /* idiv (N/A). */
1223 /* LD/ST */
1225 COSTS_N_INSNS (1), /* load. */
1226 COSTS_N_INSNS (1), /* load_sign_extend. */
1227 COSTS_N_INSNS (3), /* ldrd. */
1228 COSTS_N_INSNS (1), /* ldm_1st. */
1229 1, /* ldm_regs_per_insn_1st. */
1230 2, /* ldm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* loadf. */
1232 COSTS_N_INSNS (2), /* loadd. */
1233 COSTS_N_INSNS (1), /* load_unaligned. */
1234 COSTS_N_INSNS (1), /* store. */
1235 COSTS_N_INSNS (3), /* strd. */
1236 COSTS_N_INSNS (1), /* stm_1st. */
1237 1, /* stm_regs_per_insn_1st. */
1238 2, /* stm_regs_per_insn_subsequent. */
1239 COSTS_N_INSNS (2), /* storef. */
1240 COSTS_N_INSNS (2), /* stored. */
1241 COSTS_N_INSNS (1) /* store_unaligned. */
1244 /* FP SFmode */
1246 COSTS_N_INSNS (15), /* div. */
1247 COSTS_N_INSNS (3), /* mult. */
1248 COSTS_N_INSNS (7), /* mult_addsub. */
1249 COSTS_N_INSNS (7), /* fma. */
1250 COSTS_N_INSNS (3), /* addsub. */
1251 COSTS_N_INSNS (3), /* fpconst. */
1252 COSTS_N_INSNS (3), /* neg. */
1253 COSTS_N_INSNS (3), /* compare. */
1254 COSTS_N_INSNS (3), /* widen. */
1255 COSTS_N_INSNS (3), /* narrow. */
1256 COSTS_N_INSNS (3), /* toint. */
1257 COSTS_N_INSNS (3), /* fromint. */
1258 COSTS_N_INSNS (3) /* roundint. */
1260 /* FP DFmode */
1262 COSTS_N_INSNS (30), /* div. */
1263 COSTS_N_INSNS (6), /* mult. */
1264 COSTS_N_INSNS (10), /* mult_addsub. */
1265 COSTS_N_INSNS (7), /* fma. */
1266 COSTS_N_INSNS (3), /* addsub. */
1267 COSTS_N_INSNS (3), /* fpconst. */
1268 COSTS_N_INSNS (3), /* neg. */
1269 COSTS_N_INSNS (3), /* compare. */
1270 COSTS_N_INSNS (3), /* widen. */
1271 COSTS_N_INSNS (3), /* narrow. */
1272 COSTS_N_INSNS (3), /* toint. */
1273 COSTS_N_INSNS (3), /* fromint. */
1274 COSTS_N_INSNS (3) /* roundint. */
1277 /* Vector */
1279 COSTS_N_INSNS (1) /* alu. */
1283 const struct cpu_cost_table cortexa12_extra_costs =
1285 /* ALU */
1287 0, /* arith. */
1288 0, /* logical. */
1289 0, /* shift. */
1290 COSTS_N_INSNS (1), /* shift_reg. */
1291 COSTS_N_INSNS (1), /* arith_shift. */
1292 COSTS_N_INSNS (1), /* arith_shift_reg. */
1293 COSTS_N_INSNS (1), /* log_shift. */
1294 COSTS_N_INSNS (1), /* log_shift_reg. */
1295 0, /* extend. */
1296 COSTS_N_INSNS (1), /* extend_arith. */
1297 0, /* bfi. */
1298 COSTS_N_INSNS (1), /* bfx. */
1299 COSTS_N_INSNS (1), /* clz. */
1300 COSTS_N_INSNS (1), /* rev. */
1301 0, /* non_exec. */
1302 true /* non_exec_costs_exec. */
1304 /* MULT SImode */
1307 COSTS_N_INSNS (2), /* simple. */
1308 COSTS_N_INSNS (3), /* flag_setting. */
1309 COSTS_N_INSNS (2), /* extend. */
1310 COSTS_N_INSNS (3), /* add. */
1311 COSTS_N_INSNS (2), /* extend_add. */
1312 COSTS_N_INSNS (18) /* idiv. */
1314 /* MULT DImode */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (3), /* extend. */
1319 0, /* add (N/A). */
1320 COSTS_N_INSNS (3), /* extend_add. */
1321 0 /* idiv (N/A). */
1324 /* LD/ST */
1326 COSTS_N_INSNS (3), /* load. */
1327 COSTS_N_INSNS (3), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (3), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (3), /* loadf. */
1333 COSTS_N_INSNS (3), /* loadd. */
1334 0, /* load_unaligned. */
1335 0, /* store. */
1336 0, /* strd. */
1337 0, /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 0 /* store_unaligned. */
1345 /* FP SFmode */
1347 COSTS_N_INSNS (17), /* div. */
1348 COSTS_N_INSNS (4), /* mult. */
1349 COSTS_N_INSNS (8), /* mult_addsub. */
1350 COSTS_N_INSNS (8), /* fma. */
1351 COSTS_N_INSNS (4), /* addsub. */
1352 COSTS_N_INSNS (2), /* fpconst. */
1353 COSTS_N_INSNS (2), /* neg. */
1354 COSTS_N_INSNS (2), /* compare. */
1355 COSTS_N_INSNS (4), /* widen. */
1356 COSTS_N_INSNS (4), /* narrow. */
1357 COSTS_N_INSNS (4), /* toint. */
1358 COSTS_N_INSNS (4), /* fromint. */
1359 COSTS_N_INSNS (4) /* roundint. */
1361 /* FP DFmode */
1363 COSTS_N_INSNS (31), /* div. */
1364 COSTS_N_INSNS (4), /* mult. */
1365 COSTS_N_INSNS (8), /* mult_addsub. */
1366 COSTS_N_INSNS (8), /* fma. */
1367 COSTS_N_INSNS (4), /* addsub. */
1368 COSTS_N_INSNS (2), /* fpconst. */
1369 COSTS_N_INSNS (2), /* neg. */
1370 COSTS_N_INSNS (2), /* compare. */
1371 COSTS_N_INSNS (4), /* widen. */
1372 COSTS_N_INSNS (4), /* narrow. */
1373 COSTS_N_INSNS (4), /* toint. */
1374 COSTS_N_INSNS (4), /* fromint. */
1375 COSTS_N_INSNS (4) /* roundint. */
1378 /* Vector */
1380 COSTS_N_INSNS (1) /* alu. */
1384 const struct cpu_cost_table cortexa15_extra_costs =
1386 /* ALU */
1388 0, /* arith. */
1389 0, /* logical. */
1390 0, /* shift. */
1391 0, /* shift_reg. */
1392 COSTS_N_INSNS (1), /* arith_shift. */
1393 COSTS_N_INSNS (1), /* arith_shift_reg. */
1394 COSTS_N_INSNS (1), /* log_shift. */
1395 COSTS_N_INSNS (1), /* log_shift_reg. */
1396 0, /* extend. */
1397 COSTS_N_INSNS (1), /* extend_arith. */
1398 COSTS_N_INSNS (1), /* bfi. */
1399 0, /* bfx. */
1400 0, /* clz. */
1401 0, /* rev. */
1402 0, /* non_exec. */
1403 true /* non_exec_costs_exec. */
1405 /* MULT SImode */
1408 COSTS_N_INSNS (2), /* simple. */
1409 COSTS_N_INSNS (3), /* flag_setting. */
1410 COSTS_N_INSNS (2), /* extend. */
1411 COSTS_N_INSNS (2), /* add. */
1412 COSTS_N_INSNS (2), /* extend_add. */
1413 COSTS_N_INSNS (18) /* idiv. */
1415 /* MULT DImode */
1417 0, /* simple (N/A). */
1418 0, /* flag_setting (N/A). */
1419 COSTS_N_INSNS (3), /* extend. */
1420 0, /* add (N/A). */
1421 COSTS_N_INSNS (3), /* extend_add. */
1422 0 /* idiv (N/A). */
1425 /* LD/ST */
1427 COSTS_N_INSNS (3), /* load. */
1428 COSTS_N_INSNS (3), /* load_sign_extend. */
1429 COSTS_N_INSNS (3), /* ldrd. */
1430 COSTS_N_INSNS (4), /* ldm_1st. */
1431 1, /* ldm_regs_per_insn_1st. */
1432 2, /* ldm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (4), /* loadf. */
1434 COSTS_N_INSNS (4), /* loadd. */
1435 0, /* load_unaligned. */
1436 0, /* store. */
1437 0, /* strd. */
1438 COSTS_N_INSNS (1), /* stm_1st. */
1439 1, /* stm_regs_per_insn_1st. */
1440 2, /* stm_regs_per_insn_subsequent. */
1441 0, /* storef. */
1442 0, /* stored. */
1443 0 /* store_unaligned. */
1446 /* FP SFmode */
1448 COSTS_N_INSNS (17), /* div. */
1449 COSTS_N_INSNS (4), /* mult. */
1450 COSTS_N_INSNS (8), /* mult_addsub. */
1451 COSTS_N_INSNS (8), /* fma. */
1452 COSTS_N_INSNS (4), /* addsub. */
1453 COSTS_N_INSNS (2), /* fpconst. */
1454 COSTS_N_INSNS (2), /* neg. */
1455 COSTS_N_INSNS (5), /* compare. */
1456 COSTS_N_INSNS (4), /* widen. */
1457 COSTS_N_INSNS (4), /* narrow. */
1458 COSTS_N_INSNS (4), /* toint. */
1459 COSTS_N_INSNS (4), /* fromint. */
1460 COSTS_N_INSNS (4) /* roundint. */
1462 /* FP DFmode */
1464 COSTS_N_INSNS (31), /* div. */
1465 COSTS_N_INSNS (4), /* mult. */
1466 COSTS_N_INSNS (8), /* mult_addsub. */
1467 COSTS_N_INSNS (8), /* fma. */
1468 COSTS_N_INSNS (4), /* addsub. */
1469 COSTS_N_INSNS (2), /* fpconst. */
1470 COSTS_N_INSNS (2), /* neg. */
1471 COSTS_N_INSNS (2), /* compare. */
1472 COSTS_N_INSNS (4), /* widen. */
1473 COSTS_N_INSNS (4), /* narrow. */
1474 COSTS_N_INSNS (4), /* toint. */
1475 COSTS_N_INSNS (4), /* fromint. */
1476 COSTS_N_INSNS (4) /* roundint. */
1479 /* Vector */
1481 COSTS_N_INSNS (1) /* alu. */
1485 const struct cpu_cost_table v7m_extra_costs =
1487 /* ALU */
1489 0, /* arith. */
1490 0, /* logical. */
1491 0, /* shift. */
1492 0, /* shift_reg. */
1493 0, /* arith_shift. */
1494 COSTS_N_INSNS (1), /* arith_shift_reg. */
1495 0, /* log_shift. */
1496 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 0, /* extend. */
1498 COSTS_N_INSNS (1), /* extend_arith. */
1499 0, /* bfi. */
1500 0, /* bfx. */
1501 0, /* clz. */
1502 0, /* rev. */
1503 COSTS_N_INSNS (1), /* non_exec. */
1504 false /* non_exec_costs_exec. */
1507 /* MULT SImode */
1509 COSTS_N_INSNS (1), /* simple. */
1510 COSTS_N_INSNS (1), /* flag_setting. */
1511 COSTS_N_INSNS (2), /* extend. */
1512 COSTS_N_INSNS (1), /* add. */
1513 COSTS_N_INSNS (3), /* extend_add. */
1514 COSTS_N_INSNS (8) /* idiv. */
1516 /* MULT DImode */
1518 0, /* simple (N/A). */
1519 0, /* flag_setting (N/A). */
1520 COSTS_N_INSNS (2), /* extend. */
1521 0, /* add (N/A). */
1522 COSTS_N_INSNS (3), /* extend_add. */
1523 0 /* idiv (N/A). */
1526 /* LD/ST */
1528 COSTS_N_INSNS (2), /* load. */
1529 0, /* load_sign_extend. */
1530 COSTS_N_INSNS (3), /* ldrd. */
1531 COSTS_N_INSNS (2), /* ldm_1st. */
1532 1, /* ldm_regs_per_insn_1st. */
1533 1, /* ldm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (2), /* loadf. */
1535 COSTS_N_INSNS (3), /* loadd. */
1536 COSTS_N_INSNS (1), /* load_unaligned. */
1537 COSTS_N_INSNS (2), /* store. */
1538 COSTS_N_INSNS (3), /* strd. */
1539 COSTS_N_INSNS (2), /* stm_1st. */
1540 1, /* stm_regs_per_insn_1st. */
1541 1, /* stm_regs_per_insn_subsequent. */
1542 COSTS_N_INSNS (2), /* storef. */
1543 COSTS_N_INSNS (3), /* stored. */
1544 COSTS_N_INSNS (1) /* store_unaligned. */
1547 /* FP SFmode */
1549 COSTS_N_INSNS (7), /* div. */
1550 COSTS_N_INSNS (2), /* mult. */
1551 COSTS_N_INSNS (5), /* mult_addsub. */
1552 COSTS_N_INSNS (3), /* fma. */
1553 COSTS_N_INSNS (1), /* addsub. */
1554 0, /* fpconst. */
1555 0, /* neg. */
1556 0, /* compare. */
1557 0, /* widen. */
1558 0, /* narrow. */
1559 0, /* toint. */
1560 0, /* fromint. */
1561 0 /* roundint. */
1563 /* FP DFmode */
1565 COSTS_N_INSNS (15), /* div. */
1566 COSTS_N_INSNS (5), /* mult. */
1567 COSTS_N_INSNS (7), /* mult_addsub. */
1568 COSTS_N_INSNS (7), /* fma. */
1569 COSTS_N_INSNS (3), /* addsub. */
1570 0, /* fpconst. */
1571 0, /* neg. */
1572 0, /* compare. */
1573 0, /* widen. */
1574 0, /* narrow. */
1575 0, /* toint. */
1576 0, /* fromint. */
1577 0 /* roundint. */
1580 /* Vector */
1582 COSTS_N_INSNS (1) /* alu. */
1586 const struct tune_params arm_slowmul_tune =
1588 arm_slowmul_rtx_costs,
1589 NULL,
1590 NULL, /* Sched adj cost. */
1591 3, /* Constant limit. */
1592 5, /* Max cond insns. */
1593 ARM_PREFETCH_NOT_BENEFICIAL,
1594 true, /* Prefer constant pool. */
1595 arm_default_branch_cost,
1596 false, /* Prefer LDRD/STRD. */
1597 {true, true}, /* Prefer non short circuit. */
1598 &arm_default_vec_cost, /* Vectorizer costs. */
1599 false, /* Prefer Neon for 64-bits bitops. */
1600 false, false /* Prefer 32-bit encodings. */
1603 const struct tune_params arm_fastmul_tune =
1605 arm_fastmul_rtx_costs,
1606 NULL,
1607 NULL, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 5, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL,
1611 true, /* Prefer constant pool. */
1612 arm_default_branch_cost,
1613 false, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost, /* Vectorizer costs. */
1616 false, /* Prefer Neon for 64-bits bitops. */
1617 false, false /* Prefer 32-bit encodings. */
1620 /* StrongARM has early execution of branches, so a sequence that is worth
1621 skipping is shorter. Set max_insns_skipped to a lower value. */
1623 const struct tune_params arm_strongarm_tune =
1625 arm_fastmul_rtx_costs,
1626 NULL,
1627 NULL, /* Sched adj cost. */
1628 1, /* Constant limit. */
1629 3, /* Max cond insns. */
1630 ARM_PREFETCH_NOT_BENEFICIAL,
1631 true, /* Prefer constant pool. */
1632 arm_default_branch_cost,
1633 false, /* Prefer LDRD/STRD. */
1634 {true, true}, /* Prefer non short circuit. */
1635 &arm_default_vec_cost, /* Vectorizer costs. */
1636 false, /* Prefer Neon for 64-bits bitops. */
1637 false, false /* Prefer 32-bit encodings. */
1640 const struct tune_params arm_xscale_tune =
1642 arm_xscale_rtx_costs,
1643 NULL,
1644 xscale_sched_adjust_cost,
1645 2, /* Constant limit. */
1646 3, /* Max cond insns. */
1647 ARM_PREFETCH_NOT_BENEFICIAL,
1648 true, /* Prefer constant pool. */
1649 arm_default_branch_cost,
1650 false, /* Prefer LDRD/STRD. */
1651 {true, true}, /* Prefer non short circuit. */
1652 &arm_default_vec_cost, /* Vectorizer costs. */
1653 false, /* Prefer Neon for 64-bits bitops. */
1654 false, false /* Prefer 32-bit encodings. */
1657 const struct tune_params arm_9e_tune =
1659 arm_9e_rtx_costs,
1660 NULL,
1661 NULL, /* Sched adj cost. */
1662 1, /* Constant limit. */
1663 5, /* Max cond insns. */
1664 ARM_PREFETCH_NOT_BENEFICIAL,
1665 true, /* Prefer constant pool. */
1666 arm_default_branch_cost,
1667 false, /* Prefer LDRD/STRD. */
1668 {true, true}, /* Prefer non short circuit. */
1669 &arm_default_vec_cost, /* Vectorizer costs. */
1670 false, /* Prefer Neon for 64-bits bitops. */
1671 false, false /* Prefer 32-bit encodings. */
1674 const struct tune_params arm_v6t2_tune =
1676 arm_9e_rtx_costs,
1677 NULL,
1678 NULL, /* Sched adj cost. */
1679 1, /* Constant limit. */
1680 5, /* Max cond insns. */
1681 ARM_PREFETCH_NOT_BENEFICIAL,
1682 false, /* Prefer constant pool. */
1683 arm_default_branch_cost,
1684 false, /* Prefer LDRD/STRD. */
1685 {true, true}, /* Prefer non short circuit. */
1686 &arm_default_vec_cost, /* Vectorizer costs. */
1687 false, /* Prefer Neon for 64-bits bitops. */
1688 false, false /* Prefer 32-bit encodings. */
1691 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1692 const struct tune_params arm_cortex_tune =
1694 arm_9e_rtx_costs,
1695 &generic_extra_costs,
1696 NULL, /* Sched adj cost. */
1697 1, /* Constant limit. */
1698 5, /* Max cond insns. */
1699 ARM_PREFETCH_NOT_BENEFICIAL,
1700 false, /* Prefer constant pool. */
1701 arm_default_branch_cost,
1702 false, /* Prefer LDRD/STRD. */
1703 {true, true}, /* Prefer non short circuit. */
1704 &arm_default_vec_cost, /* Vectorizer costs. */
1705 false, /* Prefer Neon for 64-bits bitops. */
1706 false, false /* Prefer 32-bit encodings. */
1709 const struct tune_params arm_cortex_a8_tune =
1711 arm_9e_rtx_costs,
1712 &cortexa8_extra_costs,
1713 NULL, /* Sched adj cost. */
1714 1, /* Constant limit. */
1715 5, /* Max cond insns. */
1716 ARM_PREFETCH_NOT_BENEFICIAL,
1717 false, /* Prefer constant pool. */
1718 arm_default_branch_cost,
1719 false, /* Prefer LDRD/STRD. */
1720 {true, true}, /* Prefer non short circuit. */
1721 &arm_default_vec_cost, /* Vectorizer costs. */
1722 false, /* Prefer Neon for 64-bits bitops. */
1723 false, false /* Prefer 32-bit encodings. */
1726 const struct tune_params arm_cortex_a7_tune =
1728 arm_9e_rtx_costs,
1729 &cortexa7_extra_costs,
1730 NULL,
1731 1, /* Constant limit. */
1732 5, /* Max cond insns. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 false, /* Prefer constant pool. */
1735 arm_default_branch_cost,
1736 false, /* Prefer LDRD/STRD. */
1737 {true, true}, /* Prefer non short circuit. */
1738 &arm_default_vec_cost, /* Vectorizer costs. */
1739 false, /* Prefer Neon for 64-bits bitops. */
1740 false, false /* Prefer 32-bit encodings. */
1743 const struct tune_params arm_cortex_a15_tune =
1745 arm_9e_rtx_costs,
1746 &cortexa15_extra_costs,
1747 NULL, /* Sched adj cost. */
1748 1, /* Constant limit. */
1749 2, /* Max cond insns. */
1750 ARM_PREFETCH_NOT_BENEFICIAL,
1751 false, /* Prefer constant pool. */
1752 arm_default_branch_cost,
1753 true, /* Prefer LDRD/STRD. */
1754 {true, true}, /* Prefer non short circuit. */
1755 &arm_default_vec_cost, /* Vectorizer costs. */
1756 false, /* Prefer Neon for 64-bits bitops. */
1757 true, true /* Prefer 32-bit encodings. */
1760 const struct tune_params arm_cortex_a53_tune =
1762 arm_9e_rtx_costs,
1763 &cortexa53_extra_costs,
1764 NULL, /* Scheduler cost adjustment. */
1765 1, /* Constant limit. */
1766 5, /* Max cond insns. */
1767 ARM_PREFETCH_NOT_BENEFICIAL,
1768 false, /* Prefer constant pool. */
1769 arm_default_branch_cost,
1770 false, /* Prefer LDRD/STRD. */
1771 {true, true}, /* Prefer non short circuit. */
1772 &arm_default_vec_cost, /* Vectorizer costs. */
1773 false, /* Prefer Neon for 64-bits bitops. */
1774 false, false /* Prefer 32-bit encodings. */
1777 const struct tune_params arm_cortex_a57_tune =
1779 arm_9e_rtx_costs,
1780 &cortexa57_extra_costs,
1781 NULL, /* Scheduler cost adjustment. */
1782 1, /* Constant limit. */
1783 2, /* Max cond insns. */
1784 ARM_PREFETCH_NOT_BENEFICIAL,
1785 false, /* Prefer constant pool. */
1786 arm_default_branch_cost,
1787 true, /* Prefer LDRD/STRD. */
1788 {true, true}, /* Prefer non short circuit. */
1789 &arm_default_vec_cost, /* Vectorizer costs. */
1790 false, /* Prefer Neon for 64-bits bitops. */
1791 true, true /* Prefer 32-bit encodings. */
1794 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1795 less appealing. Set max_insns_skipped to a low value. */
1797 const struct tune_params arm_cortex_a5_tune =
1799 arm_9e_rtx_costs,
1800 NULL,
1801 NULL, /* Sched adj cost. */
1802 1, /* Constant limit. */
1803 1, /* Max cond insns. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 false, /* Prefer constant pool. */
1806 arm_cortex_a5_branch_cost,
1807 false, /* Prefer LDRD/STRD. */
1808 {false, false}, /* Prefer non short circuit. */
1809 &arm_default_vec_cost, /* Vectorizer costs. */
1810 false, /* Prefer Neon for 64-bits bitops. */
1811 false, false /* Prefer 32-bit encodings. */
1814 const struct tune_params arm_cortex_a9_tune =
1816 arm_9e_rtx_costs,
1817 &cortexa9_extra_costs,
1818 cortex_a9_sched_adjust_cost,
1819 1, /* Constant limit. */
1820 5, /* Max cond insns. */
1821 ARM_PREFETCH_BENEFICIAL(4,32,32),
1822 false, /* Prefer constant pool. */
1823 arm_default_branch_cost,
1824 false, /* Prefer LDRD/STRD. */
1825 {true, true}, /* Prefer non short circuit. */
1826 &arm_default_vec_cost, /* Vectorizer costs. */
1827 false, /* Prefer Neon for 64-bits bitops. */
1828 false, false /* Prefer 32-bit encodings. */
1831 const struct tune_params arm_cortex_a12_tune =
1833 arm_9e_rtx_costs,
1834 &cortexa12_extra_costs,
1835 NULL,
1836 1, /* Constant limit. */
1837 5, /* Max cond insns. */
1838 ARM_PREFETCH_BENEFICIAL(4,32,32),
1839 false, /* Prefer constant pool. */
1840 arm_default_branch_cost,
1841 true, /* Prefer LDRD/STRD. */
1842 {true, true}, /* Prefer non short circuit. */
1843 &arm_default_vec_cost, /* Vectorizer costs. */
1844 false, /* Prefer Neon for 64-bits bitops. */
1845 false, false /* Prefer 32-bit encodings. */
1848 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1849 cycle to execute each. An LDR from the constant pool also takes two cycles
1850 to execute, but mildly increases pipelining opportunity (consecutive
1851 loads/stores can be pipelined together, saving one cycle), and may also
1852 improve icache utilisation. Hence we prefer the constant pool for such
1853 processors. */
1855 const struct tune_params arm_v7m_tune =
1857 arm_9e_rtx_costs,
1858 &v7m_extra_costs,
1859 NULL, /* Sched adj cost. */
1860 1, /* Constant limit. */
1861 2, /* Max cond insns. */
1862 ARM_PREFETCH_NOT_BENEFICIAL,
1863 true, /* Prefer constant pool. */
1864 arm_cortex_m_branch_cost,
1865 false, /* Prefer LDRD/STRD. */
1866 {false, false}, /* Prefer non short circuit. */
1867 &arm_default_vec_cost, /* Vectorizer costs. */
1868 false, /* Prefer Neon for 64-bits bitops. */
1869 false, false /* Prefer 32-bit encodings. */
1872 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1873 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1874 const struct tune_params arm_v6m_tune =
1876 arm_9e_rtx_costs,
1877 NULL,
1878 NULL, /* Sched adj cost. */
1879 1, /* Constant limit. */
1880 5, /* Max cond insns. */
1881 ARM_PREFETCH_NOT_BENEFICIAL,
1882 false, /* Prefer constant pool. */
1883 arm_default_branch_cost,
1884 false, /* Prefer LDRD/STRD. */
1885 {false, false}, /* Prefer non short circuit. */
1886 &arm_default_vec_cost, /* Vectorizer costs. */
1887 false, /* Prefer Neon for 64-bits bitops. */
1888 false, false /* Prefer 32-bit encodings. */
1891 const struct tune_params arm_fa726te_tune =
1893 arm_9e_rtx_costs,
1894 NULL,
1895 fa726te_sched_adjust_cost,
1896 1, /* Constant limit. */
1897 5, /* Max cond insns. */
1898 ARM_PREFETCH_NOT_BENEFICIAL,
1899 true, /* Prefer constant pool. */
1900 arm_default_branch_cost,
1901 false, /* Prefer LDRD/STRD. */
1902 {true, true}, /* Prefer non short circuit. */
1903 &arm_default_vec_cost, /* Vectorizer costs. */
1904 false, /* Prefer Neon for 64-bits bitops. */
1905 false, false /* Prefer 32-bit encodings. */
1909 /* Not all of these give usefully different compilation alternatives,
1910 but there is no simple way of generalizing them. */
1911 static const struct processors all_cores[] =
1913 /* ARM Cores */
1914 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1915 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1916 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1917 #include "arm-cores.def"
1918 #undef ARM_CORE
1919 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1922 static const struct processors all_architectures[] =
1924 /* ARM Architectures */
1925 /* We don't specify tuning costs here as it will be figured out
1926 from the core. */
1928 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1929 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1930 #include "arm-arches.def"
1931 #undef ARM_ARCH
1932 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1936 /* These are populated as commandline arguments are processed, or NULL
1937 if not specified. */
1938 static const struct processors *arm_selected_arch;
1939 static const struct processors *arm_selected_cpu;
1940 static const struct processors *arm_selected_tune;
1942 /* The name of the preprocessor macro to define for this architecture. */
1944 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1946 /* Available values for -mfpu=. */
1948 static const struct arm_fpu_desc all_fpus[] =
1950 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1951 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1952 #include "arm-fpus.def"
1953 #undef ARM_FPU
1957 /* Supported TLS relocations. */
1959 enum tls_reloc {
1960 TLS_GD32,
1961 TLS_LDM32,
1962 TLS_LDO32,
1963 TLS_IE32,
1964 TLS_LE32,
1965 TLS_DESCSEQ /* GNU scheme */
1968 /* The maximum number of insns to be used when loading a constant. */
1969 inline static int
1970 arm_constant_limit (bool size_p)
1972 return size_p ? 1 : current_tune->constant_limit;
1975 /* Emit an insn that's a simple single-set. Both the operands must be known
1976 to be valid. */
1977 inline static rtx
1978 emit_set_insn (rtx x, rtx y)
1980 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1983 /* Return the number of bits set in VALUE. */
1984 static unsigned
1985 bit_count (unsigned long value)
1987 unsigned long count = 0;
1989 while (value)
1991 count++;
1992 value &= value - 1; /* Clear the least-significant set bit. */
1995 return count;
1998 typedef struct
2000 enum machine_mode mode;
2001 const char *name;
2002 } arm_fixed_mode_set;
2004 /* A small helper for setting fixed-point library libfuncs. */
2006 static void
2007 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2008 const char *funcname, const char *modename,
2009 int num_suffix)
2011 char buffer[50];
2013 if (num_suffix == 0)
2014 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2015 else
2016 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2018 set_optab_libfunc (optable, mode, buffer);
2021 static void
2022 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2023 enum machine_mode from, const char *funcname,
2024 const char *toname, const char *fromname)
2026 char buffer[50];
2027 const char *maybe_suffix_2 = "";
2029 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2030 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2031 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2032 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2033 maybe_suffix_2 = "2";
2035 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2036 maybe_suffix_2);
2038 set_conv_libfunc (optable, to, from, buffer);
2041 /* Set up library functions unique to ARM. */
2043 static void
2044 arm_init_libfuncs (void)
2046 /* For Linux, we have access to kernel support for atomic operations. */
2047 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2048 init_sync_libfuncs (2 * UNITS_PER_WORD);
2050 /* There are no special library functions unless we are using the
2051 ARM BPABI. */
2052 if (!TARGET_BPABI)
2053 return;
2055 /* The functions below are described in Section 4 of the "Run-Time
2056 ABI for the ARM architecture", Version 1.0. */
2058 /* Double-precision floating-point arithmetic. Table 2. */
2059 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2060 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2061 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2062 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2063 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2065 /* Double-precision comparisons. Table 3. */
2066 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2067 set_optab_libfunc (ne_optab, DFmode, NULL);
2068 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2069 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2070 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2071 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2072 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2074 /* Single-precision floating-point arithmetic. Table 4. */
2075 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2076 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2077 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2078 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2079 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2081 /* Single-precision comparisons. Table 5. */
2082 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2083 set_optab_libfunc (ne_optab, SFmode, NULL);
2084 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2085 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2086 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2087 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2088 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2090 /* Floating-point to integer conversions. Table 6. */
2091 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2092 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2093 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2094 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2095 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2096 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2097 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2098 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2100 /* Conversions between floating types. Table 7. */
2101 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2102 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2104 /* Integer to floating-point conversions. Table 8. */
2105 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2106 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2107 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2108 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2109 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2110 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2111 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2112 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2114 /* Long long. Table 9. */
2115 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2116 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2117 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2118 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2119 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2120 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2121 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2122 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2124 /* Integer (32/32->32) division. \S 4.3.1. */
2125 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2126 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2128 /* The divmod functions are designed so that they can be used for
2129 plain division, even though they return both the quotient and the
2130 remainder. The quotient is returned in the usual location (i.e.,
2131 r0 for SImode, {r0, r1} for DImode), just as would be expected
2132 for an ordinary division routine. Because the AAPCS calling
2133 conventions specify that all of { r0, r1, r2, r3 } are
2134 callee-saved registers, there is no need to tell the compiler
2135 explicitly that those registers are clobbered by these
2136 routines. */
2137 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2138 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2140 /* For SImode division the ABI provides div-without-mod routines,
2141 which are faster. */
2142 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2143 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2145 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2146 divmod libcalls instead. */
2147 set_optab_libfunc (smod_optab, DImode, NULL);
2148 set_optab_libfunc (umod_optab, DImode, NULL);
2149 set_optab_libfunc (smod_optab, SImode, NULL);
2150 set_optab_libfunc (umod_optab, SImode, NULL);
2152 /* Half-precision float operations. The compiler handles all operations
2153 with NULL libfuncs by converting the SFmode. */
2154 switch (arm_fp16_format)
2156 case ARM_FP16_FORMAT_IEEE:
2157 case ARM_FP16_FORMAT_ALTERNATIVE:
2159 /* Conversions. */
2160 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2161 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2162 ? "__gnu_f2h_ieee"
2163 : "__gnu_f2h_alternative"));
2164 set_conv_libfunc (sext_optab, SFmode, HFmode,
2165 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2166 ? "__gnu_h2f_ieee"
2167 : "__gnu_h2f_alternative"));
2169 /* Arithmetic. */
2170 set_optab_libfunc (add_optab, HFmode, NULL);
2171 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2172 set_optab_libfunc (smul_optab, HFmode, NULL);
2173 set_optab_libfunc (neg_optab, HFmode, NULL);
2174 set_optab_libfunc (sub_optab, HFmode, NULL);
2176 /* Comparisons. */
2177 set_optab_libfunc (eq_optab, HFmode, NULL);
2178 set_optab_libfunc (ne_optab, HFmode, NULL);
2179 set_optab_libfunc (lt_optab, HFmode, NULL);
2180 set_optab_libfunc (le_optab, HFmode, NULL);
2181 set_optab_libfunc (ge_optab, HFmode, NULL);
2182 set_optab_libfunc (gt_optab, HFmode, NULL);
2183 set_optab_libfunc (unord_optab, HFmode, NULL);
2184 break;
2186 default:
2187 break;
2190 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2192 const arm_fixed_mode_set fixed_arith_modes[] =
2194 { QQmode, "qq" },
2195 { UQQmode, "uqq" },
2196 { HQmode, "hq" },
2197 { UHQmode, "uhq" },
2198 { SQmode, "sq" },
2199 { USQmode, "usq" },
2200 { DQmode, "dq" },
2201 { UDQmode, "udq" },
2202 { TQmode, "tq" },
2203 { UTQmode, "utq" },
2204 { HAmode, "ha" },
2205 { UHAmode, "uha" },
2206 { SAmode, "sa" },
2207 { USAmode, "usa" },
2208 { DAmode, "da" },
2209 { UDAmode, "uda" },
2210 { TAmode, "ta" },
2211 { UTAmode, "uta" }
2213 const arm_fixed_mode_set fixed_conv_modes[] =
2215 { QQmode, "qq" },
2216 { UQQmode, "uqq" },
2217 { HQmode, "hq" },
2218 { UHQmode, "uhq" },
2219 { SQmode, "sq" },
2220 { USQmode, "usq" },
2221 { DQmode, "dq" },
2222 { UDQmode, "udq" },
2223 { TQmode, "tq" },
2224 { UTQmode, "utq" },
2225 { HAmode, "ha" },
2226 { UHAmode, "uha" },
2227 { SAmode, "sa" },
2228 { USAmode, "usa" },
2229 { DAmode, "da" },
2230 { UDAmode, "uda" },
2231 { TAmode, "ta" },
2232 { UTAmode, "uta" },
2233 { QImode, "qi" },
2234 { HImode, "hi" },
2235 { SImode, "si" },
2236 { DImode, "di" },
2237 { TImode, "ti" },
2238 { SFmode, "sf" },
2239 { DFmode, "df" }
2241 unsigned int i, j;
2243 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2245 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2246 "add", fixed_arith_modes[i].name, 3);
2247 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2248 "ssadd", fixed_arith_modes[i].name, 3);
2249 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2250 "usadd", fixed_arith_modes[i].name, 3);
2251 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2252 "sub", fixed_arith_modes[i].name, 3);
2253 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2254 "sssub", fixed_arith_modes[i].name, 3);
2255 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2256 "ussub", fixed_arith_modes[i].name, 3);
2257 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2258 "mul", fixed_arith_modes[i].name, 3);
2259 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2260 "ssmul", fixed_arith_modes[i].name, 3);
2261 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2262 "usmul", fixed_arith_modes[i].name, 3);
2263 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2264 "div", fixed_arith_modes[i].name, 3);
2265 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2266 "udiv", fixed_arith_modes[i].name, 3);
2267 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2268 "ssdiv", fixed_arith_modes[i].name, 3);
2269 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2270 "usdiv", fixed_arith_modes[i].name, 3);
2271 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2272 "neg", fixed_arith_modes[i].name, 2);
2273 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2274 "ssneg", fixed_arith_modes[i].name, 2);
2275 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2276 "usneg", fixed_arith_modes[i].name, 2);
2277 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2278 "ashl", fixed_arith_modes[i].name, 3);
2279 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2280 "ashr", fixed_arith_modes[i].name, 3);
2281 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2282 "lshr", fixed_arith_modes[i].name, 3);
2283 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2284 "ssashl", fixed_arith_modes[i].name, 3);
2285 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2286 "usashl", fixed_arith_modes[i].name, 3);
2287 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2288 "cmp", fixed_arith_modes[i].name, 2);
2291 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2292 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2294 if (i == j
2295 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2296 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2297 continue;
2299 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2300 fixed_conv_modes[j].mode, "fract",
2301 fixed_conv_modes[i].name,
2302 fixed_conv_modes[j].name);
2303 arm_set_fixed_conv_libfunc (satfract_optab,
2304 fixed_conv_modes[i].mode,
2305 fixed_conv_modes[j].mode, "satfract",
2306 fixed_conv_modes[i].name,
2307 fixed_conv_modes[j].name);
2308 arm_set_fixed_conv_libfunc (fractuns_optab,
2309 fixed_conv_modes[i].mode,
2310 fixed_conv_modes[j].mode, "fractuns",
2311 fixed_conv_modes[i].name,
2312 fixed_conv_modes[j].name);
2313 arm_set_fixed_conv_libfunc (satfractuns_optab,
2314 fixed_conv_modes[i].mode,
2315 fixed_conv_modes[j].mode, "satfractuns",
2316 fixed_conv_modes[i].name,
2317 fixed_conv_modes[j].name);
2321 if (TARGET_AAPCS_BASED)
2322 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2325 /* On AAPCS systems, this is the "struct __va_list". */
2326 static GTY(()) tree va_list_type;
2328 /* Return the type to use as __builtin_va_list. */
2329 static tree
2330 arm_build_builtin_va_list (void)
2332 tree va_list_name;
2333 tree ap_field;
2335 if (!TARGET_AAPCS_BASED)
2336 return std_build_builtin_va_list ();
2338 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2339 defined as:
2341 struct __va_list
2343 void *__ap;
2346 The C Library ABI further reinforces this definition in \S
2347 4.1.
2349 We must follow this definition exactly. The structure tag
2350 name is visible in C++ mangled names, and thus forms a part
2351 of the ABI. The field name may be used by people who
2352 #include <stdarg.h>. */
2353 /* Create the type. */
2354 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2355 /* Give it the required name. */
2356 va_list_name = build_decl (BUILTINS_LOCATION,
2357 TYPE_DECL,
2358 get_identifier ("__va_list"),
2359 va_list_type);
2360 DECL_ARTIFICIAL (va_list_name) = 1;
2361 TYPE_NAME (va_list_type) = va_list_name;
2362 TYPE_STUB_DECL (va_list_type) = va_list_name;
2363 /* Create the __ap field. */
2364 ap_field = build_decl (BUILTINS_LOCATION,
2365 FIELD_DECL,
2366 get_identifier ("__ap"),
2367 ptr_type_node);
2368 DECL_ARTIFICIAL (ap_field) = 1;
2369 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2370 TYPE_FIELDS (va_list_type) = ap_field;
2371 /* Compute its layout. */
2372 layout_type (va_list_type);
2374 return va_list_type;
2377 /* Return an expression of type "void *" pointing to the next
2378 available argument in a variable-argument list. VALIST is the
2379 user-level va_list object, of type __builtin_va_list. */
2380 static tree
2381 arm_extract_valist_ptr (tree valist)
2383 if (TREE_TYPE (valist) == error_mark_node)
2384 return error_mark_node;
2386 /* On an AAPCS target, the pointer is stored within "struct
2387 va_list". */
2388 if (TARGET_AAPCS_BASED)
2390 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2391 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2392 valist, ap_field, NULL_TREE);
2395 return valist;
2398 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2399 static void
2400 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2402 valist = arm_extract_valist_ptr (valist);
2403 std_expand_builtin_va_start (valist, nextarg);
2406 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2407 static tree
2408 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2409 gimple_seq *post_p)
2411 valist = arm_extract_valist_ptr (valist);
2412 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2415 /* Fix up any incompatible options that the user has specified. */
2416 static void
2417 arm_option_override (void)
2419 if (global_options_set.x_arm_arch_option)
2420 arm_selected_arch = &all_architectures[arm_arch_option];
2422 if (global_options_set.x_arm_cpu_option)
2424 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2425 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2428 if (global_options_set.x_arm_tune_option)
2429 arm_selected_tune = &all_cores[(int) arm_tune_option];
2431 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2432 SUBTARGET_OVERRIDE_OPTIONS;
2433 #endif
2435 if (arm_selected_arch)
2437 if (arm_selected_cpu)
2439 /* Check for conflict between mcpu and march. */
2440 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2442 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2443 arm_selected_cpu->name, arm_selected_arch->name);
2444 /* -march wins for code generation.
2445 -mcpu wins for default tuning. */
2446 if (!arm_selected_tune)
2447 arm_selected_tune = arm_selected_cpu;
2449 arm_selected_cpu = arm_selected_arch;
2451 else
2452 /* -mcpu wins. */
2453 arm_selected_arch = NULL;
2455 else
2456 /* Pick a CPU based on the architecture. */
2457 arm_selected_cpu = arm_selected_arch;
2460 /* If the user did not specify a processor, choose one for them. */
2461 if (!arm_selected_cpu)
2463 const struct processors * sel;
2464 unsigned int sought;
2466 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2467 if (!arm_selected_cpu->name)
2469 #ifdef SUBTARGET_CPU_DEFAULT
2470 /* Use the subtarget default CPU if none was specified by
2471 configure. */
2472 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2473 #endif
2474 /* Default to ARM6. */
2475 if (!arm_selected_cpu->name)
2476 arm_selected_cpu = &all_cores[arm6];
2479 sel = arm_selected_cpu;
2480 insn_flags = sel->flags;
2482 /* Now check to see if the user has specified some command line
2483 switch that require certain abilities from the cpu. */
2484 sought = 0;
2486 if (TARGET_INTERWORK || TARGET_THUMB)
2488 sought |= (FL_THUMB | FL_MODE32);
2490 /* There are no ARM processors that support both APCS-26 and
2491 interworking. Therefore we force FL_MODE26 to be removed
2492 from insn_flags here (if it was set), so that the search
2493 below will always be able to find a compatible processor. */
2494 insn_flags &= ~FL_MODE26;
2497 if (sought != 0 && ((sought & insn_flags) != sought))
2499 /* Try to locate a CPU type that supports all of the abilities
2500 of the default CPU, plus the extra abilities requested by
2501 the user. */
2502 for (sel = all_cores; sel->name != NULL; sel++)
2503 if ((sel->flags & sought) == (sought | insn_flags))
2504 break;
2506 if (sel->name == NULL)
2508 unsigned current_bit_count = 0;
2509 const struct processors * best_fit = NULL;
2511 /* Ideally we would like to issue an error message here
2512 saying that it was not possible to find a CPU compatible
2513 with the default CPU, but which also supports the command
2514 line options specified by the programmer, and so they
2515 ought to use the -mcpu=<name> command line option to
2516 override the default CPU type.
2518 If we cannot find a cpu that has both the
2519 characteristics of the default cpu and the given
2520 command line options we scan the array again looking
2521 for a best match. */
2522 for (sel = all_cores; sel->name != NULL; sel++)
2523 if ((sel->flags & sought) == sought)
2525 unsigned count;
2527 count = bit_count (sel->flags & insn_flags);
2529 if (count >= current_bit_count)
2531 best_fit = sel;
2532 current_bit_count = count;
2536 gcc_assert (best_fit);
2537 sel = best_fit;
2540 arm_selected_cpu = sel;
2544 gcc_assert (arm_selected_cpu);
2545 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2546 if (!arm_selected_tune)
2547 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2549 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2550 insn_flags = arm_selected_cpu->flags;
2551 arm_base_arch = arm_selected_cpu->base_arch;
2553 arm_tune = arm_selected_tune->core;
2554 tune_flags = arm_selected_tune->flags;
2555 current_tune = arm_selected_tune->tune;
2557 /* Make sure that the processor choice does not conflict with any of the
2558 other command line choices. */
2559 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2560 error ("target CPU does not support ARM mode");
2562 /* BPABI targets use linker tricks to allow interworking on cores
2563 without thumb support. */
2564 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2566 warning (0, "target CPU does not support interworking" );
2567 target_flags &= ~MASK_INTERWORK;
2570 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2572 warning (0, "target CPU does not support THUMB instructions");
2573 target_flags &= ~MASK_THUMB;
2576 if (TARGET_APCS_FRAME && TARGET_THUMB)
2578 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2579 target_flags &= ~MASK_APCS_FRAME;
2582 /* Callee super interworking implies thumb interworking. Adding
2583 this to the flags here simplifies the logic elsewhere. */
2584 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2585 target_flags |= MASK_INTERWORK;
2587 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2588 from here where no function is being compiled currently. */
2589 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2590 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2592 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2593 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2595 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2597 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2598 target_flags |= MASK_APCS_FRAME;
2601 if (TARGET_POKE_FUNCTION_NAME)
2602 target_flags |= MASK_APCS_FRAME;
2604 if (TARGET_APCS_REENT && flag_pic)
2605 error ("-fpic and -mapcs-reent are incompatible");
2607 if (TARGET_APCS_REENT)
2608 warning (0, "APCS reentrant code not supported. Ignored");
2610 /* If this target is normally configured to use APCS frames, warn if they
2611 are turned off and debugging is turned on. */
2612 if (TARGET_ARM
2613 && write_symbols != NO_DEBUG
2614 && !TARGET_APCS_FRAME
2615 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2616 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2618 if (TARGET_APCS_FLOAT)
2619 warning (0, "passing floating point arguments in fp regs not yet supported");
2621 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2622 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2623 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2624 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2625 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2626 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2627 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2628 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2629 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2630 arm_arch6m = arm_arch6 && !arm_arch_notm;
2631 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2632 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2633 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2634 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2635 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2637 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2638 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2639 thumb_code = TARGET_ARM == 0;
2640 thumb1_code = TARGET_THUMB1 != 0;
2641 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2642 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2643 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2644 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2645 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2646 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2647 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2648 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2649 if (arm_restrict_it == 2)
2650 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2652 if (!TARGET_THUMB2)
2653 arm_restrict_it = 0;
2655 /* If we are not using the default (ARM mode) section anchor offset
2656 ranges, then set the correct ranges now. */
2657 if (TARGET_THUMB1)
2659 /* Thumb-1 LDR instructions cannot have negative offsets.
2660 Permissible positive offset ranges are 5-bit (for byte loads),
2661 6-bit (for halfword loads), or 7-bit (for word loads).
2662 Empirical results suggest a 7-bit anchor range gives the best
2663 overall code size. */
2664 targetm.min_anchor_offset = 0;
2665 targetm.max_anchor_offset = 127;
2667 else if (TARGET_THUMB2)
2669 /* The minimum is set such that the total size of the block
2670 for a particular anchor is 248 + 1 + 4095 bytes, which is
2671 divisible by eight, ensuring natural spacing of anchors. */
2672 targetm.min_anchor_offset = -248;
2673 targetm.max_anchor_offset = 4095;
2676 /* V5 code we generate is completely interworking capable, so we turn off
2677 TARGET_INTERWORK here to avoid many tests later on. */
2679 /* XXX However, we must pass the right pre-processor defines to CPP
2680 or GLD can get confused. This is a hack. */
2681 if (TARGET_INTERWORK)
2682 arm_cpp_interwork = 1;
2684 if (arm_arch5)
2685 target_flags &= ~MASK_INTERWORK;
2687 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2688 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2690 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2691 error ("iwmmxt abi requires an iwmmxt capable cpu");
2693 if (!global_options_set.x_arm_fpu_index)
2695 const char *target_fpu_name;
2696 bool ok;
2698 #ifdef FPUTYPE_DEFAULT
2699 target_fpu_name = FPUTYPE_DEFAULT;
2700 #else
2701 target_fpu_name = "vfp";
2702 #endif
2704 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2705 CL_TARGET);
2706 gcc_assert (ok);
2709 arm_fpu_desc = &all_fpus[arm_fpu_index];
2711 switch (arm_fpu_desc->model)
2713 case ARM_FP_MODEL_VFP:
2714 arm_fpu_attr = FPU_VFP;
2715 break;
2717 default:
2718 gcc_unreachable();
2721 if (TARGET_AAPCS_BASED)
2723 if (TARGET_CALLER_INTERWORKING)
2724 error ("AAPCS does not support -mcaller-super-interworking");
2725 else
2726 if (TARGET_CALLEE_INTERWORKING)
2727 error ("AAPCS does not support -mcallee-super-interworking");
2730 /* iWMMXt and NEON are incompatible. */
2731 if (TARGET_IWMMXT && TARGET_NEON)
2732 error ("iWMMXt and NEON are incompatible");
2734 /* iWMMXt unsupported under Thumb mode. */
2735 if (TARGET_THUMB && TARGET_IWMMXT)
2736 error ("iWMMXt unsupported under Thumb mode");
2738 /* __fp16 support currently assumes the core has ldrh. */
2739 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2740 sorry ("__fp16 and no ldrh");
2742 /* If soft-float is specified then don't use FPU. */
2743 if (TARGET_SOFT_FLOAT)
2744 arm_fpu_attr = FPU_NONE;
2746 if (TARGET_AAPCS_BASED)
2748 if (arm_abi == ARM_ABI_IWMMXT)
2749 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2750 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2751 && TARGET_HARD_FLOAT
2752 && TARGET_VFP)
2753 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2754 else
2755 arm_pcs_default = ARM_PCS_AAPCS;
2757 else
2759 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2760 sorry ("-mfloat-abi=hard and VFP");
2762 if (arm_abi == ARM_ABI_APCS)
2763 arm_pcs_default = ARM_PCS_APCS;
2764 else
2765 arm_pcs_default = ARM_PCS_ATPCS;
2768 /* For arm2/3 there is no need to do any scheduling if we are doing
2769 software floating-point. */
2770 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2771 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2773 /* Use the cp15 method if it is available. */
2774 if (target_thread_pointer == TP_AUTO)
2776 if (arm_arch6k && !TARGET_THUMB1)
2777 target_thread_pointer = TP_CP15;
2778 else
2779 target_thread_pointer = TP_SOFT;
2782 if (TARGET_HARD_TP && TARGET_THUMB1)
2783 error ("can not use -mtp=cp15 with 16-bit Thumb");
2785 /* Override the default structure alignment for AAPCS ABI. */
2786 if (!global_options_set.x_arm_structure_size_boundary)
2788 if (TARGET_AAPCS_BASED)
2789 arm_structure_size_boundary = 8;
2791 else
2793 if (arm_structure_size_boundary != 8
2794 && arm_structure_size_boundary != 32
2795 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2797 if (ARM_DOUBLEWORD_ALIGN)
2798 warning (0,
2799 "structure size boundary can only be set to 8, 32 or 64");
2800 else
2801 warning (0, "structure size boundary can only be set to 8 or 32");
2802 arm_structure_size_boundary
2803 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2807 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2809 error ("RTP PIC is incompatible with Thumb");
2810 flag_pic = 0;
2813 /* If stack checking is disabled, we can use r10 as the PIC register,
2814 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2815 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2817 if (TARGET_VXWORKS_RTP)
2818 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2819 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2822 if (flag_pic && TARGET_VXWORKS_RTP)
2823 arm_pic_register = 9;
2825 if (arm_pic_register_string != NULL)
2827 int pic_register = decode_reg_name (arm_pic_register_string);
2829 if (!flag_pic)
2830 warning (0, "-mpic-register= is useless without -fpic");
2832 /* Prevent the user from choosing an obviously stupid PIC register. */
2833 else if (pic_register < 0 || call_used_regs[pic_register]
2834 || pic_register == HARD_FRAME_POINTER_REGNUM
2835 || pic_register == STACK_POINTER_REGNUM
2836 || pic_register >= PC_REGNUM
2837 || (TARGET_VXWORKS_RTP
2838 && (unsigned int) pic_register != arm_pic_register))
2839 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2840 else
2841 arm_pic_register = pic_register;
2844 if (TARGET_VXWORKS_RTP
2845 && !global_options_set.x_arm_pic_data_is_text_relative)
2846 arm_pic_data_is_text_relative = 0;
2848 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2849 if (fix_cm3_ldrd == 2)
2851 if (arm_selected_cpu->core == cortexm3)
2852 fix_cm3_ldrd = 1;
2853 else
2854 fix_cm3_ldrd = 0;
2857 /* Enable -munaligned-access by default for
2858 - all ARMv6 architecture-based processors
2859 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2860 - ARMv8 architecture-base processors.
2862 Disable -munaligned-access by default for
2863 - all pre-ARMv6 architecture-based processors
2864 - ARMv6-M architecture-based processors. */
2866 if (unaligned_access == 2)
2868 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2869 unaligned_access = 1;
2870 else
2871 unaligned_access = 0;
2873 else if (unaligned_access == 1
2874 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2876 warning (0, "target CPU does not support unaligned accesses");
2877 unaligned_access = 0;
2880 if (TARGET_THUMB1 && flag_schedule_insns)
2882 /* Don't warn since it's on by default in -O2. */
2883 flag_schedule_insns = 0;
2886 if (optimize_size)
2888 /* If optimizing for size, bump the number of instructions that we
2889 are prepared to conditionally execute (even on a StrongARM). */
2890 max_insns_skipped = 6;
2892 else
2893 max_insns_skipped = current_tune->max_insns_skipped;
2895 /* Hot/Cold partitioning is not currently supported, since we can't
2896 handle literal pool placement in that case. */
2897 if (flag_reorder_blocks_and_partition)
2899 inform (input_location,
2900 "-freorder-blocks-and-partition not supported on this architecture");
2901 flag_reorder_blocks_and_partition = 0;
2902 flag_reorder_blocks = 1;
2905 if (flag_pic)
2906 /* Hoisting PIC address calculations more aggressively provides a small,
2907 but measurable, size reduction for PIC code. Therefore, we decrease
2908 the bar for unrestricted expression hoisting to the cost of PIC address
2909 calculation, which is 2 instructions. */
2910 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2911 global_options.x_param_values,
2912 global_options_set.x_param_values);
2914 /* ARM EABI defaults to strict volatile bitfields. */
2915 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2916 && abi_version_at_least(2))
2917 flag_strict_volatile_bitfields = 1;
2919 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2920 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2921 if (flag_prefetch_loop_arrays < 0
2922 && HAVE_prefetch
2923 && optimize >= 3
2924 && current_tune->num_prefetch_slots > 0)
2925 flag_prefetch_loop_arrays = 1;
2927 /* Set up parameters to be used in prefetching algorithm. Do not override the
2928 defaults unless we are tuning for a core we have researched values for. */
2929 if (current_tune->num_prefetch_slots > 0)
2930 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2931 current_tune->num_prefetch_slots,
2932 global_options.x_param_values,
2933 global_options_set.x_param_values);
2934 if (current_tune->l1_cache_line_size >= 0)
2935 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2936 current_tune->l1_cache_line_size,
2937 global_options.x_param_values,
2938 global_options_set.x_param_values);
2939 if (current_tune->l1_cache_size >= 0)
2940 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2941 current_tune->l1_cache_size,
2942 global_options.x_param_values,
2943 global_options_set.x_param_values);
2945 /* Use Neon to perform 64-bits operations rather than core
2946 registers. */
2947 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2948 if (use_neon_for_64bits == 1)
2949 prefer_neon_for_64bits = true;
2951 /* Use the alternative scheduling-pressure algorithm by default. */
2952 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
2953 global_options.x_param_values,
2954 global_options_set.x_param_values);
2956 /* Disable shrink-wrap when optimizing function for size, since it tends to
2957 generate additional returns. */
2958 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2959 flag_shrink_wrap = false;
2960 /* TBD: Dwarf info for apcs frame is not handled yet. */
2961 if (TARGET_APCS_FRAME)
2962 flag_shrink_wrap = false;
2964 /* We only support -mslow-flash-data on armv7-m targets. */
2965 if (target_slow_flash_data
2966 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2967 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2968 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2970 /* Currently, for slow flash data, we just disable literal pools. */
2971 if (target_slow_flash_data)
2972 arm_disable_literal_pool = true;
2974 /* Register global variables with the garbage collector. */
2975 arm_add_gc_roots ();
2978 static void
2979 arm_add_gc_roots (void)
2981 gcc_obstack_init(&minipool_obstack);
2982 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2985 /* A table of known ARM exception types.
2986 For use with the interrupt function attribute. */
2988 typedef struct
2990 const char *const arg;
2991 const unsigned long return_value;
2993 isr_attribute_arg;
2995 static const isr_attribute_arg isr_attribute_args [] =
2997 { "IRQ", ARM_FT_ISR },
2998 { "irq", ARM_FT_ISR },
2999 { "FIQ", ARM_FT_FIQ },
3000 { "fiq", ARM_FT_FIQ },
3001 { "ABORT", ARM_FT_ISR },
3002 { "abort", ARM_FT_ISR },
3003 { "ABORT", ARM_FT_ISR },
3004 { "abort", ARM_FT_ISR },
3005 { "UNDEF", ARM_FT_EXCEPTION },
3006 { "undef", ARM_FT_EXCEPTION },
3007 { "SWI", ARM_FT_EXCEPTION },
3008 { "swi", ARM_FT_EXCEPTION },
3009 { NULL, ARM_FT_NORMAL }
3012 /* Returns the (interrupt) function type of the current
3013 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3015 static unsigned long
3016 arm_isr_value (tree argument)
3018 const isr_attribute_arg * ptr;
3019 const char * arg;
3021 if (!arm_arch_notm)
3022 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3024 /* No argument - default to IRQ. */
3025 if (argument == NULL_TREE)
3026 return ARM_FT_ISR;
3028 /* Get the value of the argument. */
3029 if (TREE_VALUE (argument) == NULL_TREE
3030 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3031 return ARM_FT_UNKNOWN;
3033 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3035 /* Check it against the list of known arguments. */
3036 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3037 if (streq (arg, ptr->arg))
3038 return ptr->return_value;
3040 /* An unrecognized interrupt type. */
3041 return ARM_FT_UNKNOWN;
3044 /* Computes the type of the current function. */
3046 static unsigned long
3047 arm_compute_func_type (void)
3049 unsigned long type = ARM_FT_UNKNOWN;
3050 tree a;
3051 tree attr;
3053 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3055 /* Decide if the current function is volatile. Such functions
3056 never return, and many memory cycles can be saved by not storing
3057 register values that will never be needed again. This optimization
3058 was added to speed up context switching in a kernel application. */
3059 if (optimize > 0
3060 && (TREE_NOTHROW (current_function_decl)
3061 || !(flag_unwind_tables
3062 || (flag_exceptions
3063 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3064 && TREE_THIS_VOLATILE (current_function_decl))
3065 type |= ARM_FT_VOLATILE;
3067 if (cfun->static_chain_decl != NULL)
3068 type |= ARM_FT_NESTED;
3070 attr = DECL_ATTRIBUTES (current_function_decl);
3072 a = lookup_attribute ("naked", attr);
3073 if (a != NULL_TREE)
3074 type |= ARM_FT_NAKED;
3076 a = lookup_attribute ("isr", attr);
3077 if (a == NULL_TREE)
3078 a = lookup_attribute ("interrupt", attr);
3080 if (a == NULL_TREE)
3081 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3082 else
3083 type |= arm_isr_value (TREE_VALUE (a));
3085 return type;
3088 /* Returns the type of the current function. */
3090 unsigned long
3091 arm_current_func_type (void)
3093 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3094 cfun->machine->func_type = arm_compute_func_type ();
3096 return cfun->machine->func_type;
3099 bool
3100 arm_allocate_stack_slots_for_args (void)
3102 /* Naked functions should not allocate stack slots for arguments. */
3103 return !IS_NAKED (arm_current_func_type ());
3106 static bool
3107 arm_warn_func_return (tree decl)
3109 /* Naked functions are implemented entirely in assembly, including the
3110 return sequence, so suppress warnings about this. */
3111 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3115 /* Output assembler code for a block containing the constant parts
3116 of a trampoline, leaving space for the variable parts.
3118 On the ARM, (if r8 is the static chain regnum, and remembering that
3119 referencing pc adds an offset of 8) the trampoline looks like:
3120 ldr r8, [pc, #0]
3121 ldr pc, [pc]
3122 .word static chain value
3123 .word function's address
3124 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3126 static void
3127 arm_asm_trampoline_template (FILE *f)
3129 if (TARGET_ARM)
3131 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3132 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3134 else if (TARGET_THUMB2)
3136 /* The Thumb-2 trampoline is similar to the arm implementation.
3137 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3138 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3139 STATIC_CHAIN_REGNUM, PC_REGNUM);
3140 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3142 else
3144 ASM_OUTPUT_ALIGN (f, 2);
3145 fprintf (f, "\t.code\t16\n");
3146 fprintf (f, ".Ltrampoline_start:\n");
3147 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3148 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3149 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3150 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3151 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3152 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3154 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3155 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3158 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3160 static void
3161 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3163 rtx fnaddr, mem, a_tramp;
3165 emit_block_move (m_tramp, assemble_trampoline_template (),
3166 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3168 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3169 emit_move_insn (mem, chain_value);
3171 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3172 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3173 emit_move_insn (mem, fnaddr);
3175 a_tramp = XEXP (m_tramp, 0);
3176 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3177 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3178 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3181 /* Thumb trampolines should be entered in thumb mode, so set
3182 the bottom bit of the address. */
3184 static rtx
3185 arm_trampoline_adjust_address (rtx addr)
3187 if (TARGET_THUMB)
3188 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3189 NULL, 0, OPTAB_LIB_WIDEN);
3190 return addr;
3193 /* Return 1 if it is possible to return using a single instruction.
3194 If SIBLING is non-null, this is a test for a return before a sibling
3195 call. SIBLING is the call insn, so we can examine its register usage. */
3198 use_return_insn (int iscond, rtx sibling)
3200 int regno;
3201 unsigned int func_type;
3202 unsigned long saved_int_regs;
3203 unsigned HOST_WIDE_INT stack_adjust;
3204 arm_stack_offsets *offsets;
3206 /* Never use a return instruction before reload has run. */
3207 if (!reload_completed)
3208 return 0;
3210 func_type = arm_current_func_type ();
3212 /* Naked, volatile and stack alignment functions need special
3213 consideration. */
3214 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3215 return 0;
3217 /* So do interrupt functions that use the frame pointer and Thumb
3218 interrupt functions. */
3219 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3220 return 0;
3222 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3223 && !optimize_function_for_size_p (cfun))
3224 return 0;
3226 offsets = arm_get_frame_offsets ();
3227 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3229 /* As do variadic functions. */
3230 if (crtl->args.pretend_args_size
3231 || cfun->machine->uses_anonymous_args
3232 /* Or if the function calls __builtin_eh_return () */
3233 || crtl->calls_eh_return
3234 /* Or if the function calls alloca */
3235 || cfun->calls_alloca
3236 /* Or if there is a stack adjustment. However, if the stack pointer
3237 is saved on the stack, we can use a pre-incrementing stack load. */
3238 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3239 && stack_adjust == 4)))
3240 return 0;
3242 saved_int_regs = offsets->saved_regs_mask;
3244 /* Unfortunately, the insn
3246 ldmib sp, {..., sp, ...}
3248 triggers a bug on most SA-110 based devices, such that the stack
3249 pointer won't be correctly restored if the instruction takes a
3250 page fault. We work around this problem by popping r3 along with
3251 the other registers, since that is never slower than executing
3252 another instruction.
3254 We test for !arm_arch5 here, because code for any architecture
3255 less than this could potentially be run on one of the buggy
3256 chips. */
3257 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3259 /* Validate that r3 is a call-clobbered register (always true in
3260 the default abi) ... */
3261 if (!call_used_regs[3])
3262 return 0;
3264 /* ... that it isn't being used for a return value ... */
3265 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3266 return 0;
3268 /* ... or for a tail-call argument ... */
3269 if (sibling)
3271 gcc_assert (CALL_P (sibling));
3273 if (find_regno_fusage (sibling, USE, 3))
3274 return 0;
3277 /* ... and that there are no call-saved registers in r0-r2
3278 (always true in the default ABI). */
3279 if (saved_int_regs & 0x7)
3280 return 0;
3283 /* Can't be done if interworking with Thumb, and any registers have been
3284 stacked. */
3285 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3286 return 0;
3288 /* On StrongARM, conditional returns are expensive if they aren't
3289 taken and multiple registers have been stacked. */
3290 if (iscond && arm_tune_strongarm)
3292 /* Conditional return when just the LR is stored is a simple
3293 conditional-load instruction, that's not expensive. */
3294 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3295 return 0;
3297 if (flag_pic
3298 && arm_pic_register != INVALID_REGNUM
3299 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3300 return 0;
3303 /* If there are saved registers but the LR isn't saved, then we need
3304 two instructions for the return. */
3305 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3306 return 0;
3308 /* Can't be done if any of the VFP regs are pushed,
3309 since this also requires an insn. */
3310 if (TARGET_HARD_FLOAT && TARGET_VFP)
3311 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3312 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3313 return 0;
3315 if (TARGET_REALLY_IWMMXT)
3316 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3317 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3318 return 0;
3320 return 1;
3323 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3324 shrink-wrapping if possible. This is the case if we need to emit a
3325 prologue, which we can test by looking at the offsets. */
3326 bool
3327 use_simple_return_p (void)
3329 arm_stack_offsets *offsets;
3331 offsets = arm_get_frame_offsets ();
3332 return offsets->outgoing_args != 0;
3335 /* Return TRUE if int I is a valid immediate ARM constant. */
3338 const_ok_for_arm (HOST_WIDE_INT i)
3340 int lowbit;
3342 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3343 be all zero, or all one. */
3344 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3345 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3346 != ((~(unsigned HOST_WIDE_INT) 0)
3347 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3348 return FALSE;
3350 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3352 /* Fast return for 0 and small values. We must do this for zero, since
3353 the code below can't handle that one case. */
3354 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3355 return TRUE;
3357 /* Get the number of trailing zeros. */
3358 lowbit = ffs((int) i) - 1;
3360 /* Only even shifts are allowed in ARM mode so round down to the
3361 nearest even number. */
3362 if (TARGET_ARM)
3363 lowbit &= ~1;
3365 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3366 return TRUE;
3368 if (TARGET_ARM)
3370 /* Allow rotated constants in ARM mode. */
3371 if (lowbit <= 4
3372 && ((i & ~0xc000003f) == 0
3373 || (i & ~0xf000000f) == 0
3374 || (i & ~0xfc000003) == 0))
3375 return TRUE;
3377 else
3379 HOST_WIDE_INT v;
3381 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3382 v = i & 0xff;
3383 v |= v << 16;
3384 if (i == v || i == (v | (v << 8)))
3385 return TRUE;
3387 /* Allow repeated pattern 0xXY00XY00. */
3388 v = i & 0xff00;
3389 v |= v << 16;
3390 if (i == v)
3391 return TRUE;
3394 return FALSE;
3397 /* Return true if I is a valid constant for the operation CODE. */
3399 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3401 if (const_ok_for_arm (i))
3402 return 1;
3404 switch (code)
3406 case SET:
3407 /* See if we can use movw. */
3408 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3409 return 1;
3410 else
3411 /* Otherwise, try mvn. */
3412 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3414 case PLUS:
3415 /* See if we can use addw or subw. */
3416 if (TARGET_THUMB2
3417 && ((i & 0xfffff000) == 0
3418 || ((-i) & 0xfffff000) == 0))
3419 return 1;
3420 /* else fall through. */
3422 case COMPARE:
3423 case EQ:
3424 case NE:
3425 case GT:
3426 case LE:
3427 case LT:
3428 case GE:
3429 case GEU:
3430 case LTU:
3431 case GTU:
3432 case LEU:
3433 case UNORDERED:
3434 case ORDERED:
3435 case UNEQ:
3436 case UNGE:
3437 case UNLT:
3438 case UNGT:
3439 case UNLE:
3440 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3442 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3443 case XOR:
3444 return 0;
3446 case IOR:
3447 if (TARGET_THUMB2)
3448 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3449 return 0;
3451 case AND:
3452 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3454 default:
3455 gcc_unreachable ();
3459 /* Return true if I is a valid di mode constant for the operation CODE. */
3461 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3463 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3464 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3465 rtx hi = GEN_INT (hi_val);
3466 rtx lo = GEN_INT (lo_val);
3468 if (TARGET_THUMB1)
3469 return 0;
3471 switch (code)
3473 case AND:
3474 case IOR:
3475 case XOR:
3476 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3477 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3478 case PLUS:
3479 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3481 default:
3482 return 0;
3486 /* Emit a sequence of insns to handle a large constant.
3487 CODE is the code of the operation required, it can be any of SET, PLUS,
3488 IOR, AND, XOR, MINUS;
3489 MODE is the mode in which the operation is being performed;
3490 VAL is the integer to operate on;
3491 SOURCE is the other operand (a register, or a null-pointer for SET);
3492 SUBTARGETS means it is safe to create scratch registers if that will
3493 either produce a simpler sequence, or we will want to cse the values.
3494 Return value is the number of insns emitted. */
3496 /* ??? Tweak this for thumb2. */
3498 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3499 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3501 rtx cond;
3503 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3504 cond = COND_EXEC_TEST (PATTERN (insn));
3505 else
3506 cond = NULL_RTX;
3508 if (subtargets || code == SET
3509 || (REG_P (target) && REG_P (source)
3510 && REGNO (target) != REGNO (source)))
3512 /* After arm_reorg has been called, we can't fix up expensive
3513 constants by pushing them into memory so we must synthesize
3514 them in-line, regardless of the cost. This is only likely to
3515 be more costly on chips that have load delay slots and we are
3516 compiling without running the scheduler (so no splitting
3517 occurred before the final instruction emission).
3519 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3521 if (!cfun->machine->after_arm_reorg
3522 && !cond
3523 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3524 1, 0)
3525 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3526 + (code != SET))))
3528 if (code == SET)
3530 /* Currently SET is the only monadic value for CODE, all
3531 the rest are diadic. */
3532 if (TARGET_USE_MOVT)
3533 arm_emit_movpair (target, GEN_INT (val));
3534 else
3535 emit_set_insn (target, GEN_INT (val));
3537 return 1;
3539 else
3541 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3543 if (TARGET_USE_MOVT)
3544 arm_emit_movpair (temp, GEN_INT (val));
3545 else
3546 emit_set_insn (temp, GEN_INT (val));
3548 /* For MINUS, the value is subtracted from, since we never
3549 have subtraction of a constant. */
3550 if (code == MINUS)
3551 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3552 else
3553 emit_set_insn (target,
3554 gen_rtx_fmt_ee (code, mode, source, temp));
3555 return 2;
3560 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3564 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3565 ARM/THUMB2 immediates, and add up to VAL.
3566 Thr function return value gives the number of insns required. */
3567 static int
3568 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3569 struct four_ints *return_sequence)
3571 int best_consecutive_zeros = 0;
3572 int i;
3573 int best_start = 0;
3574 int insns1, insns2;
3575 struct four_ints tmp_sequence;
3577 /* If we aren't targeting ARM, the best place to start is always at
3578 the bottom, otherwise look more closely. */
3579 if (TARGET_ARM)
3581 for (i = 0; i < 32; i += 2)
3583 int consecutive_zeros = 0;
3585 if (!(val & (3 << i)))
3587 while ((i < 32) && !(val & (3 << i)))
3589 consecutive_zeros += 2;
3590 i += 2;
3592 if (consecutive_zeros > best_consecutive_zeros)
3594 best_consecutive_zeros = consecutive_zeros;
3595 best_start = i - consecutive_zeros;
3597 i -= 2;
3602 /* So long as it won't require any more insns to do so, it's
3603 desirable to emit a small constant (in bits 0...9) in the last
3604 insn. This way there is more chance that it can be combined with
3605 a later addressing insn to form a pre-indexed load or store
3606 operation. Consider:
3608 *((volatile int *)0xe0000100) = 1;
3609 *((volatile int *)0xe0000110) = 2;
3611 We want this to wind up as:
3613 mov rA, #0xe0000000
3614 mov rB, #1
3615 str rB, [rA, #0x100]
3616 mov rB, #2
3617 str rB, [rA, #0x110]
3619 rather than having to synthesize both large constants from scratch.
3621 Therefore, we calculate how many insns would be required to emit
3622 the constant starting from `best_start', and also starting from
3623 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3624 yield a shorter sequence, we may as well use zero. */
3625 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3626 if (best_start != 0
3627 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3629 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3630 if (insns2 <= insns1)
3632 *return_sequence = tmp_sequence;
3633 insns1 = insns2;
3637 return insns1;
3640 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3641 static int
3642 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3643 struct four_ints *return_sequence, int i)
3645 int remainder = val & 0xffffffff;
3646 int insns = 0;
3648 /* Try and find a way of doing the job in either two or three
3649 instructions.
3651 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3652 location. We start at position I. This may be the MSB, or
3653 optimial_immediate_sequence may have positioned it at the largest block
3654 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3655 wrapping around to the top of the word when we drop off the bottom.
3656 In the worst case this code should produce no more than four insns.
3658 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3659 constants, shifted to any arbitrary location. We should always start
3660 at the MSB. */
3663 int end;
3664 unsigned int b1, b2, b3, b4;
3665 unsigned HOST_WIDE_INT result;
3666 int loc;
3668 gcc_assert (insns < 4);
3670 if (i <= 0)
3671 i += 32;
3673 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3674 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3676 loc = i;
3677 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3678 /* We can use addw/subw for the last 12 bits. */
3679 result = remainder;
3680 else
3682 /* Use an 8-bit shifted/rotated immediate. */
3683 end = i - 8;
3684 if (end < 0)
3685 end += 32;
3686 result = remainder & ((0x0ff << end)
3687 | ((i < end) ? (0xff >> (32 - end))
3688 : 0));
3689 i -= 8;
3692 else
3694 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3695 arbitrary shifts. */
3696 i -= TARGET_ARM ? 2 : 1;
3697 continue;
3700 /* Next, see if we can do a better job with a thumb2 replicated
3701 constant.
3703 We do it this way around to catch the cases like 0x01F001E0 where
3704 two 8-bit immediates would work, but a replicated constant would
3705 make it worse.
3707 TODO: 16-bit constants that don't clear all the bits, but still win.
3708 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3709 if (TARGET_THUMB2)
3711 b1 = (remainder & 0xff000000) >> 24;
3712 b2 = (remainder & 0x00ff0000) >> 16;
3713 b3 = (remainder & 0x0000ff00) >> 8;
3714 b4 = remainder & 0xff;
3716 if (loc > 24)
3718 /* The 8-bit immediate already found clears b1 (and maybe b2),
3719 but must leave b3 and b4 alone. */
3721 /* First try to find a 32-bit replicated constant that clears
3722 almost everything. We can assume that we can't do it in one,
3723 or else we wouldn't be here. */
3724 unsigned int tmp = b1 & b2 & b3 & b4;
3725 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3726 + (tmp << 24);
3727 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3728 + (tmp == b3) + (tmp == b4);
3729 if (tmp
3730 && (matching_bytes >= 3
3731 || (matching_bytes == 2
3732 && const_ok_for_op (remainder & ~tmp2, code))))
3734 /* At least 3 of the bytes match, and the fourth has at
3735 least as many bits set, or two of the bytes match
3736 and it will only require one more insn to finish. */
3737 result = tmp2;
3738 i = tmp != b1 ? 32
3739 : tmp != b2 ? 24
3740 : tmp != b3 ? 16
3741 : 8;
3744 /* Second, try to find a 16-bit replicated constant that can
3745 leave three of the bytes clear. If b2 or b4 is already
3746 zero, then we can. If the 8-bit from above would not
3747 clear b2 anyway, then we still win. */
3748 else if (b1 == b3 && (!b2 || !b4
3749 || (remainder & 0x00ff0000 & ~result)))
3751 result = remainder & 0xff00ff00;
3752 i = 24;
3755 else if (loc > 16)
3757 /* The 8-bit immediate already found clears b2 (and maybe b3)
3758 and we don't get here unless b1 is alredy clear, but it will
3759 leave b4 unchanged. */
3761 /* If we can clear b2 and b4 at once, then we win, since the
3762 8-bits couldn't possibly reach that far. */
3763 if (b2 == b4)
3765 result = remainder & 0x00ff00ff;
3766 i = 16;
3771 return_sequence->i[insns++] = result;
3772 remainder &= ~result;
3774 if (code == SET || code == MINUS)
3775 code = PLUS;
3777 while (remainder);
3779 return insns;
3782 /* Emit an instruction with the indicated PATTERN. If COND is
3783 non-NULL, conditionalize the execution of the instruction on COND
3784 being true. */
3786 static void
3787 emit_constant_insn (rtx cond, rtx pattern)
3789 if (cond)
3790 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3791 emit_insn (pattern);
3794 /* As above, but extra parameter GENERATE which, if clear, suppresses
3795 RTL generation. */
3797 static int
3798 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3799 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3800 int generate)
3802 int can_invert = 0;
3803 int can_negate = 0;
3804 int final_invert = 0;
3805 int i;
3806 int set_sign_bit_copies = 0;
3807 int clear_sign_bit_copies = 0;
3808 int clear_zero_bit_copies = 0;
3809 int set_zero_bit_copies = 0;
3810 int insns = 0, neg_insns, inv_insns;
3811 unsigned HOST_WIDE_INT temp1, temp2;
3812 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3813 struct four_ints *immediates;
3814 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3816 /* Find out which operations are safe for a given CODE. Also do a quick
3817 check for degenerate cases; these can occur when DImode operations
3818 are split. */
3819 switch (code)
3821 case SET:
3822 can_invert = 1;
3823 break;
3825 case PLUS:
3826 can_negate = 1;
3827 break;
3829 case IOR:
3830 if (remainder == 0xffffffff)
3832 if (generate)
3833 emit_constant_insn (cond,
3834 gen_rtx_SET (VOIDmode, target,
3835 GEN_INT (ARM_SIGN_EXTEND (val))));
3836 return 1;
3839 if (remainder == 0)
3841 if (reload_completed && rtx_equal_p (target, source))
3842 return 0;
3844 if (generate)
3845 emit_constant_insn (cond,
3846 gen_rtx_SET (VOIDmode, target, source));
3847 return 1;
3849 break;
3851 case AND:
3852 if (remainder == 0)
3854 if (generate)
3855 emit_constant_insn (cond,
3856 gen_rtx_SET (VOIDmode, target, const0_rtx));
3857 return 1;
3859 if (remainder == 0xffffffff)
3861 if (reload_completed && rtx_equal_p (target, source))
3862 return 0;
3863 if (generate)
3864 emit_constant_insn (cond,
3865 gen_rtx_SET (VOIDmode, target, source));
3866 return 1;
3868 can_invert = 1;
3869 break;
3871 case XOR:
3872 if (remainder == 0)
3874 if (reload_completed && rtx_equal_p (target, source))
3875 return 0;
3876 if (generate)
3877 emit_constant_insn (cond,
3878 gen_rtx_SET (VOIDmode, target, source));
3879 return 1;
3882 if (remainder == 0xffffffff)
3884 if (generate)
3885 emit_constant_insn (cond,
3886 gen_rtx_SET (VOIDmode, target,
3887 gen_rtx_NOT (mode, source)));
3888 return 1;
3890 final_invert = 1;
3891 break;
3893 case MINUS:
3894 /* We treat MINUS as (val - source), since (source - val) is always
3895 passed as (source + (-val)). */
3896 if (remainder == 0)
3898 if (generate)
3899 emit_constant_insn (cond,
3900 gen_rtx_SET (VOIDmode, target,
3901 gen_rtx_NEG (mode, source)));
3902 return 1;
3904 if (const_ok_for_arm (val))
3906 if (generate)
3907 emit_constant_insn (cond,
3908 gen_rtx_SET (VOIDmode, target,
3909 gen_rtx_MINUS (mode, GEN_INT (val),
3910 source)));
3911 return 1;
3914 break;
3916 default:
3917 gcc_unreachable ();
3920 /* If we can do it in one insn get out quickly. */
3921 if (const_ok_for_op (val, code))
3923 if (generate)
3924 emit_constant_insn (cond,
3925 gen_rtx_SET (VOIDmode, target,
3926 (source
3927 ? gen_rtx_fmt_ee (code, mode, source,
3928 GEN_INT (val))
3929 : GEN_INT (val))));
3930 return 1;
3933 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3934 insn. */
3935 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3936 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3938 if (generate)
3940 if (mode == SImode && i == 16)
3941 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3942 smaller insn. */
3943 emit_constant_insn (cond,
3944 gen_zero_extendhisi2
3945 (target, gen_lowpart (HImode, source)));
3946 else
3947 /* Extz only supports SImode, but we can coerce the operands
3948 into that mode. */
3949 emit_constant_insn (cond,
3950 gen_extzv_t2 (gen_lowpart (SImode, target),
3951 gen_lowpart (SImode, source),
3952 GEN_INT (i), const0_rtx));
3955 return 1;
3958 /* Calculate a few attributes that may be useful for specific
3959 optimizations. */
3960 /* Count number of leading zeros. */
3961 for (i = 31; i >= 0; i--)
3963 if ((remainder & (1 << i)) == 0)
3964 clear_sign_bit_copies++;
3965 else
3966 break;
3969 /* Count number of leading 1's. */
3970 for (i = 31; i >= 0; i--)
3972 if ((remainder & (1 << i)) != 0)
3973 set_sign_bit_copies++;
3974 else
3975 break;
3978 /* Count number of trailing zero's. */
3979 for (i = 0; i <= 31; i++)
3981 if ((remainder & (1 << i)) == 0)
3982 clear_zero_bit_copies++;
3983 else
3984 break;
3987 /* Count number of trailing 1's. */
3988 for (i = 0; i <= 31; i++)
3990 if ((remainder & (1 << i)) != 0)
3991 set_zero_bit_copies++;
3992 else
3993 break;
3996 switch (code)
3998 case SET:
3999 /* See if we can do this by sign_extending a constant that is known
4000 to be negative. This is a good, way of doing it, since the shift
4001 may well merge into a subsequent insn. */
4002 if (set_sign_bit_copies > 1)
4004 if (const_ok_for_arm
4005 (temp1 = ARM_SIGN_EXTEND (remainder
4006 << (set_sign_bit_copies - 1))))
4008 if (generate)
4010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4011 emit_constant_insn (cond,
4012 gen_rtx_SET (VOIDmode, new_src,
4013 GEN_INT (temp1)));
4014 emit_constant_insn (cond,
4015 gen_ashrsi3 (target, new_src,
4016 GEN_INT (set_sign_bit_copies - 1)));
4018 return 2;
4020 /* For an inverted constant, we will need to set the low bits,
4021 these will be shifted out of harm's way. */
4022 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4023 if (const_ok_for_arm (~temp1))
4025 if (generate)
4027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4028 emit_constant_insn (cond,
4029 gen_rtx_SET (VOIDmode, new_src,
4030 GEN_INT (temp1)));
4031 emit_constant_insn (cond,
4032 gen_ashrsi3 (target, new_src,
4033 GEN_INT (set_sign_bit_copies - 1)));
4035 return 2;
4039 /* See if we can calculate the value as the difference between two
4040 valid immediates. */
4041 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4043 int topshift = clear_sign_bit_copies & ~1;
4045 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4046 & (0xff000000 >> topshift));
4048 /* If temp1 is zero, then that means the 9 most significant
4049 bits of remainder were 1 and we've caused it to overflow.
4050 When topshift is 0 we don't need to do anything since we
4051 can borrow from 'bit 32'. */
4052 if (temp1 == 0 && topshift != 0)
4053 temp1 = 0x80000000 >> (topshift - 1);
4055 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4057 if (const_ok_for_arm (temp2))
4059 if (generate)
4061 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4062 emit_constant_insn (cond,
4063 gen_rtx_SET (VOIDmode, new_src,
4064 GEN_INT (temp1)));
4065 emit_constant_insn (cond,
4066 gen_addsi3 (target, new_src,
4067 GEN_INT (-temp2)));
4070 return 2;
4074 /* See if we can generate this by setting the bottom (or the top)
4075 16 bits, and then shifting these into the other half of the
4076 word. We only look for the simplest cases, to do more would cost
4077 too much. Be careful, however, not to generate this when the
4078 alternative would take fewer insns. */
4079 if (val & 0xffff0000)
4081 temp1 = remainder & 0xffff0000;
4082 temp2 = remainder & 0x0000ffff;
4084 /* Overlaps outside this range are best done using other methods. */
4085 for (i = 9; i < 24; i++)
4087 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4088 && !const_ok_for_arm (temp2))
4090 rtx new_src = (subtargets
4091 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4092 : target);
4093 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4094 source, subtargets, generate);
4095 source = new_src;
4096 if (generate)
4097 emit_constant_insn
4098 (cond,
4099 gen_rtx_SET
4100 (VOIDmode, target,
4101 gen_rtx_IOR (mode,
4102 gen_rtx_ASHIFT (mode, source,
4103 GEN_INT (i)),
4104 source)));
4105 return insns + 1;
4109 /* Don't duplicate cases already considered. */
4110 for (i = 17; i < 24; i++)
4112 if (((temp1 | (temp1 >> i)) == remainder)
4113 && !const_ok_for_arm (temp1))
4115 rtx new_src = (subtargets
4116 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4117 : target);
4118 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4119 source, subtargets, generate);
4120 source = new_src;
4121 if (generate)
4122 emit_constant_insn
4123 (cond,
4124 gen_rtx_SET (VOIDmode, target,
4125 gen_rtx_IOR
4126 (mode,
4127 gen_rtx_LSHIFTRT (mode, source,
4128 GEN_INT (i)),
4129 source)));
4130 return insns + 1;
4134 break;
4136 case IOR:
4137 case XOR:
4138 /* If we have IOR or XOR, and the constant can be loaded in a
4139 single instruction, and we can find a temporary to put it in,
4140 then this can be done in two instructions instead of 3-4. */
4141 if (subtargets
4142 /* TARGET can't be NULL if SUBTARGETS is 0 */
4143 || (reload_completed && !reg_mentioned_p (target, source)))
4145 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4147 if (generate)
4149 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4151 emit_constant_insn (cond,
4152 gen_rtx_SET (VOIDmode, sub,
4153 GEN_INT (val)));
4154 emit_constant_insn (cond,
4155 gen_rtx_SET (VOIDmode, target,
4156 gen_rtx_fmt_ee (code, mode,
4157 source, sub)));
4159 return 2;
4163 if (code == XOR)
4164 break;
4166 /* Convert.
4167 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4168 and the remainder 0s for e.g. 0xfff00000)
4169 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4171 This can be done in 2 instructions by using shifts with mov or mvn.
4172 e.g. for
4173 x = x | 0xfff00000;
4174 we generate.
4175 mvn r0, r0, asl #12
4176 mvn r0, r0, lsr #12 */
4177 if (set_sign_bit_copies > 8
4178 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4180 if (generate)
4182 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4183 rtx shift = GEN_INT (set_sign_bit_copies);
4185 emit_constant_insn
4186 (cond,
4187 gen_rtx_SET (VOIDmode, sub,
4188 gen_rtx_NOT (mode,
4189 gen_rtx_ASHIFT (mode,
4190 source,
4191 shift))));
4192 emit_constant_insn
4193 (cond,
4194 gen_rtx_SET (VOIDmode, target,
4195 gen_rtx_NOT (mode,
4196 gen_rtx_LSHIFTRT (mode, sub,
4197 shift))));
4199 return 2;
4202 /* Convert
4203 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4205 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4207 For eg. r0 = r0 | 0xfff
4208 mvn r0, r0, lsr #12
4209 mvn r0, r0, asl #12
4212 if (set_zero_bit_copies > 8
4213 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4215 if (generate)
4217 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4218 rtx shift = GEN_INT (set_zero_bit_copies);
4220 emit_constant_insn
4221 (cond,
4222 gen_rtx_SET (VOIDmode, sub,
4223 gen_rtx_NOT (mode,
4224 gen_rtx_LSHIFTRT (mode,
4225 source,
4226 shift))));
4227 emit_constant_insn
4228 (cond,
4229 gen_rtx_SET (VOIDmode, target,
4230 gen_rtx_NOT (mode,
4231 gen_rtx_ASHIFT (mode, sub,
4232 shift))));
4234 return 2;
4237 /* This will never be reached for Thumb2 because orn is a valid
4238 instruction. This is for Thumb1 and the ARM 32 bit cases.
4240 x = y | constant (such that ~constant is a valid constant)
4241 Transform this to
4242 x = ~(~y & ~constant).
4244 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4246 if (generate)
4248 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4249 emit_constant_insn (cond,
4250 gen_rtx_SET (VOIDmode, sub,
4251 gen_rtx_NOT (mode, source)));
4252 source = sub;
4253 if (subtargets)
4254 sub = gen_reg_rtx (mode);
4255 emit_constant_insn (cond,
4256 gen_rtx_SET (VOIDmode, sub,
4257 gen_rtx_AND (mode, source,
4258 GEN_INT (temp1))));
4259 emit_constant_insn (cond,
4260 gen_rtx_SET (VOIDmode, target,
4261 gen_rtx_NOT (mode, sub)));
4263 return 3;
4265 break;
4267 case AND:
4268 /* See if two shifts will do 2 or more insn's worth of work. */
4269 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4271 HOST_WIDE_INT shift_mask = ((0xffffffff
4272 << (32 - clear_sign_bit_copies))
4273 & 0xffffffff);
4275 if ((remainder | shift_mask) != 0xffffffff)
4277 if (generate)
4279 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4280 insns = arm_gen_constant (AND, mode, cond,
4281 remainder | shift_mask,
4282 new_src, source, subtargets, 1);
4283 source = new_src;
4285 else
4287 rtx targ = subtargets ? NULL_RTX : target;
4288 insns = arm_gen_constant (AND, mode, cond,
4289 remainder | shift_mask,
4290 targ, source, subtargets, 0);
4294 if (generate)
4296 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4297 rtx shift = GEN_INT (clear_sign_bit_copies);
4299 emit_insn (gen_ashlsi3 (new_src, source, shift));
4300 emit_insn (gen_lshrsi3 (target, new_src, shift));
4303 return insns + 2;
4306 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4308 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4310 if ((remainder | shift_mask) != 0xffffffff)
4312 if (generate)
4314 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4316 insns = arm_gen_constant (AND, mode, cond,
4317 remainder | shift_mask,
4318 new_src, source, subtargets, 1);
4319 source = new_src;
4321 else
4323 rtx targ = subtargets ? NULL_RTX : target;
4325 insns = arm_gen_constant (AND, mode, cond,
4326 remainder | shift_mask,
4327 targ, source, subtargets, 0);
4331 if (generate)
4333 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4334 rtx shift = GEN_INT (clear_zero_bit_copies);
4336 emit_insn (gen_lshrsi3 (new_src, source, shift));
4337 emit_insn (gen_ashlsi3 (target, new_src, shift));
4340 return insns + 2;
4343 break;
4345 default:
4346 break;
4349 /* Calculate what the instruction sequences would be if we generated it
4350 normally, negated, or inverted. */
4351 if (code == AND)
4352 /* AND cannot be split into multiple insns, so invert and use BIC. */
4353 insns = 99;
4354 else
4355 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4357 if (can_negate)
4358 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4359 &neg_immediates);
4360 else
4361 neg_insns = 99;
4363 if (can_invert || final_invert)
4364 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4365 &inv_immediates);
4366 else
4367 inv_insns = 99;
4369 immediates = &pos_immediates;
4371 /* Is the negated immediate sequence more efficient? */
4372 if (neg_insns < insns && neg_insns <= inv_insns)
4374 insns = neg_insns;
4375 immediates = &neg_immediates;
4377 else
4378 can_negate = 0;
4380 /* Is the inverted immediate sequence more efficient?
4381 We must allow for an extra NOT instruction for XOR operations, although
4382 there is some chance that the final 'mvn' will get optimized later. */
4383 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4385 insns = inv_insns;
4386 immediates = &inv_immediates;
4388 else
4390 can_invert = 0;
4391 final_invert = 0;
4394 /* Now output the chosen sequence as instructions. */
4395 if (generate)
4397 for (i = 0; i < insns; i++)
4399 rtx new_src, temp1_rtx;
4401 temp1 = immediates->i[i];
4403 if (code == SET || code == MINUS)
4404 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4405 else if ((final_invert || i < (insns - 1)) && subtargets)
4406 new_src = gen_reg_rtx (mode);
4407 else
4408 new_src = target;
4410 if (can_invert)
4411 temp1 = ~temp1;
4412 else if (can_negate)
4413 temp1 = -temp1;
4415 temp1 = trunc_int_for_mode (temp1, mode);
4416 temp1_rtx = GEN_INT (temp1);
4418 if (code == SET)
4420 else if (code == MINUS)
4421 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4422 else
4423 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4425 emit_constant_insn (cond,
4426 gen_rtx_SET (VOIDmode, new_src,
4427 temp1_rtx));
4428 source = new_src;
4430 if (code == SET)
4432 can_negate = can_invert;
4433 can_invert = 0;
4434 code = PLUS;
4436 else if (code == MINUS)
4437 code = PLUS;
4441 if (final_invert)
4443 if (generate)
4444 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4445 gen_rtx_NOT (mode, source)));
4446 insns++;
4449 return insns;
4452 /* Canonicalize a comparison so that we are more likely to recognize it.
4453 This can be done for a few constant compares, where we can make the
4454 immediate value easier to load. */
4456 static void
4457 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4458 bool op0_preserve_value)
4460 enum machine_mode mode;
4461 unsigned HOST_WIDE_INT i, maxval;
4463 mode = GET_MODE (*op0);
4464 if (mode == VOIDmode)
4465 mode = GET_MODE (*op1);
4467 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4469 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4470 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4471 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4472 for GTU/LEU in Thumb mode. */
4473 if (mode == DImode)
4475 rtx tem;
4477 if (*code == GT || *code == LE
4478 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4480 /* Missing comparison. First try to use an available
4481 comparison. */
4482 if (CONST_INT_P (*op1))
4484 i = INTVAL (*op1);
4485 switch (*code)
4487 case GT:
4488 case LE:
4489 if (i != maxval
4490 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4492 *op1 = GEN_INT (i + 1);
4493 *code = *code == GT ? GE : LT;
4494 return;
4496 break;
4497 case GTU:
4498 case LEU:
4499 if (i != ~((unsigned HOST_WIDE_INT) 0)
4500 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4502 *op1 = GEN_INT (i + 1);
4503 *code = *code == GTU ? GEU : LTU;
4504 return;
4506 break;
4507 default:
4508 gcc_unreachable ();
4512 /* If that did not work, reverse the condition. */
4513 if (!op0_preserve_value)
4515 tem = *op0;
4516 *op0 = *op1;
4517 *op1 = tem;
4518 *code = (int)swap_condition ((enum rtx_code)*code);
4521 return;
4524 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4525 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4526 to facilitate possible combining with a cmp into 'ands'. */
4527 if (mode == SImode
4528 && GET_CODE (*op0) == ZERO_EXTEND
4529 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4530 && GET_MODE (XEXP (*op0, 0)) == QImode
4531 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4532 && subreg_lowpart_p (XEXP (*op0, 0))
4533 && *op1 == const0_rtx)
4534 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4535 GEN_INT (255));
4537 /* Comparisons smaller than DImode. Only adjust comparisons against
4538 an out-of-range constant. */
4539 if (!CONST_INT_P (*op1)
4540 || const_ok_for_arm (INTVAL (*op1))
4541 || const_ok_for_arm (- INTVAL (*op1)))
4542 return;
4544 i = INTVAL (*op1);
4546 switch (*code)
4548 case EQ:
4549 case NE:
4550 return;
4552 case GT:
4553 case LE:
4554 if (i != maxval
4555 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4557 *op1 = GEN_INT (i + 1);
4558 *code = *code == GT ? GE : LT;
4559 return;
4561 break;
4563 case GE:
4564 case LT:
4565 if (i != ~maxval
4566 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4568 *op1 = GEN_INT (i - 1);
4569 *code = *code == GE ? GT : LE;
4570 return;
4572 break;
4574 case GTU:
4575 case LEU:
4576 if (i != ~((unsigned HOST_WIDE_INT) 0)
4577 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4579 *op1 = GEN_INT (i + 1);
4580 *code = *code == GTU ? GEU : LTU;
4581 return;
4583 break;
4585 case GEU:
4586 case LTU:
4587 if (i != 0
4588 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4590 *op1 = GEN_INT (i - 1);
4591 *code = *code == GEU ? GTU : LEU;
4592 return;
4594 break;
4596 default:
4597 gcc_unreachable ();
4602 /* Define how to find the value returned by a function. */
4604 static rtx
4605 arm_function_value(const_tree type, const_tree func,
4606 bool outgoing ATTRIBUTE_UNUSED)
4608 enum machine_mode mode;
4609 int unsignedp ATTRIBUTE_UNUSED;
4610 rtx r ATTRIBUTE_UNUSED;
4612 mode = TYPE_MODE (type);
4614 if (TARGET_AAPCS_BASED)
4615 return aapcs_allocate_return_reg (mode, type, func);
4617 /* Promote integer types. */
4618 if (INTEGRAL_TYPE_P (type))
4619 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4621 /* Promotes small structs returned in a register to full-word size
4622 for big-endian AAPCS. */
4623 if (arm_return_in_msb (type))
4625 HOST_WIDE_INT size = int_size_in_bytes (type);
4626 if (size % UNITS_PER_WORD != 0)
4628 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4629 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4633 return arm_libcall_value_1 (mode);
4636 /* libcall hashtable helpers. */
4638 struct libcall_hasher : typed_noop_remove <rtx_def>
4640 typedef rtx_def value_type;
4641 typedef rtx_def compare_type;
4642 static inline hashval_t hash (const value_type *);
4643 static inline bool equal (const value_type *, const compare_type *);
4644 static inline void remove (value_type *);
4647 inline bool
4648 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4650 return rtx_equal_p (p1, p2);
4653 inline hashval_t
4654 libcall_hasher::hash (const value_type *p1)
4656 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4659 typedef hash_table<libcall_hasher> libcall_table_type;
4661 static void
4662 add_libcall (libcall_table_type *htab, rtx libcall)
4664 *htab->find_slot (libcall, INSERT) = libcall;
4667 static bool
4668 arm_libcall_uses_aapcs_base (const_rtx libcall)
4670 static bool init_done = false;
4671 static libcall_table_type *libcall_htab = NULL;
4673 if (!init_done)
4675 init_done = true;
4677 libcall_htab = new libcall_table_type (31);
4678 add_libcall (libcall_htab,
4679 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4680 add_libcall (libcall_htab,
4681 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4682 add_libcall (libcall_htab,
4683 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4684 add_libcall (libcall_htab,
4685 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4687 add_libcall (libcall_htab,
4688 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4689 add_libcall (libcall_htab,
4690 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4691 add_libcall (libcall_htab,
4692 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4693 add_libcall (libcall_htab,
4694 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4696 add_libcall (libcall_htab,
4697 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4698 add_libcall (libcall_htab,
4699 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4700 add_libcall (libcall_htab,
4701 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4702 add_libcall (libcall_htab,
4703 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4704 add_libcall (libcall_htab,
4705 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4706 add_libcall (libcall_htab,
4707 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4708 add_libcall (libcall_htab,
4709 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4710 add_libcall (libcall_htab,
4711 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4713 /* Values from double-precision helper functions are returned in core
4714 registers if the selected core only supports single-precision
4715 arithmetic, even if we are using the hard-float ABI. The same is
4716 true for single-precision helpers, but we will never be using the
4717 hard-float ABI on a CPU which doesn't support single-precision
4718 operations in hardware. */
4719 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4720 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4721 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4722 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4723 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4724 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4725 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4726 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4727 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4728 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4729 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4730 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4731 SFmode));
4732 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4733 DFmode));
4736 return libcall && libcall_htab->find (libcall) != NULL;
4739 static rtx
4740 arm_libcall_value_1 (enum machine_mode mode)
4742 if (TARGET_AAPCS_BASED)
4743 return aapcs_libcall_value (mode);
4744 else if (TARGET_IWMMXT_ABI
4745 && arm_vector_mode_supported_p (mode))
4746 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4747 else
4748 return gen_rtx_REG (mode, ARG_REGISTER (1));
4751 /* Define how to find the value returned by a library function
4752 assuming the value has mode MODE. */
4754 static rtx
4755 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4757 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4758 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4760 /* The following libcalls return their result in integer registers,
4761 even though they return a floating point value. */
4762 if (arm_libcall_uses_aapcs_base (libcall))
4763 return gen_rtx_REG (mode, ARG_REGISTER(1));
4767 return arm_libcall_value_1 (mode);
4770 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4772 static bool
4773 arm_function_value_regno_p (const unsigned int regno)
4775 if (regno == ARG_REGISTER (1)
4776 || (TARGET_32BIT
4777 && TARGET_AAPCS_BASED
4778 && TARGET_VFP
4779 && TARGET_HARD_FLOAT
4780 && regno == FIRST_VFP_REGNUM)
4781 || (TARGET_IWMMXT_ABI
4782 && regno == FIRST_IWMMXT_REGNUM))
4783 return true;
4785 return false;
4788 /* Determine the amount of memory needed to store the possible return
4789 registers of an untyped call. */
4791 arm_apply_result_size (void)
4793 int size = 16;
4795 if (TARGET_32BIT)
4797 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4798 size += 32;
4799 if (TARGET_IWMMXT_ABI)
4800 size += 8;
4803 return size;
4806 /* Decide whether TYPE should be returned in memory (true)
4807 or in a register (false). FNTYPE is the type of the function making
4808 the call. */
4809 static bool
4810 arm_return_in_memory (const_tree type, const_tree fntype)
4812 HOST_WIDE_INT size;
4814 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4816 if (TARGET_AAPCS_BASED)
4818 /* Simple, non-aggregate types (ie not including vectors and
4819 complex) are always returned in a register (or registers).
4820 We don't care about which register here, so we can short-cut
4821 some of the detail. */
4822 if (!AGGREGATE_TYPE_P (type)
4823 && TREE_CODE (type) != VECTOR_TYPE
4824 && TREE_CODE (type) != COMPLEX_TYPE)
4825 return false;
4827 /* Any return value that is no larger than one word can be
4828 returned in r0. */
4829 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4830 return false;
4832 /* Check any available co-processors to see if they accept the
4833 type as a register candidate (VFP, for example, can return
4834 some aggregates in consecutive registers). These aren't
4835 available if the call is variadic. */
4836 if (aapcs_select_return_coproc (type, fntype) >= 0)
4837 return false;
4839 /* Vector values should be returned using ARM registers, not
4840 memory (unless they're over 16 bytes, which will break since
4841 we only have four call-clobbered registers to play with). */
4842 if (TREE_CODE (type) == VECTOR_TYPE)
4843 return (size < 0 || size > (4 * UNITS_PER_WORD));
4845 /* The rest go in memory. */
4846 return true;
4849 if (TREE_CODE (type) == VECTOR_TYPE)
4850 return (size < 0 || size > (4 * UNITS_PER_WORD));
4852 if (!AGGREGATE_TYPE_P (type) &&
4853 (TREE_CODE (type) != VECTOR_TYPE))
4854 /* All simple types are returned in registers. */
4855 return false;
4857 if (arm_abi != ARM_ABI_APCS)
4859 /* ATPCS and later return aggregate types in memory only if they are
4860 larger than a word (or are variable size). */
4861 return (size < 0 || size > UNITS_PER_WORD);
4864 /* For the arm-wince targets we choose to be compatible with Microsoft's
4865 ARM and Thumb compilers, which always return aggregates in memory. */
4866 #ifndef ARM_WINCE
4867 /* All structures/unions bigger than one word are returned in memory.
4868 Also catch the case where int_size_in_bytes returns -1. In this case
4869 the aggregate is either huge or of variable size, and in either case
4870 we will want to return it via memory and not in a register. */
4871 if (size < 0 || size > UNITS_PER_WORD)
4872 return true;
4874 if (TREE_CODE (type) == RECORD_TYPE)
4876 tree field;
4878 /* For a struct the APCS says that we only return in a register
4879 if the type is 'integer like' and every addressable element
4880 has an offset of zero. For practical purposes this means
4881 that the structure can have at most one non bit-field element
4882 and that this element must be the first one in the structure. */
4884 /* Find the first field, ignoring non FIELD_DECL things which will
4885 have been created by C++. */
4886 for (field = TYPE_FIELDS (type);
4887 field && TREE_CODE (field) != FIELD_DECL;
4888 field = DECL_CHAIN (field))
4889 continue;
4891 if (field == NULL)
4892 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4894 /* Check that the first field is valid for returning in a register. */
4896 /* ... Floats are not allowed */
4897 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4898 return true;
4900 /* ... Aggregates that are not themselves valid for returning in
4901 a register are not allowed. */
4902 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4903 return true;
4905 /* Now check the remaining fields, if any. Only bitfields are allowed,
4906 since they are not addressable. */
4907 for (field = DECL_CHAIN (field);
4908 field;
4909 field = DECL_CHAIN (field))
4911 if (TREE_CODE (field) != FIELD_DECL)
4912 continue;
4914 if (!DECL_BIT_FIELD_TYPE (field))
4915 return true;
4918 return false;
4921 if (TREE_CODE (type) == UNION_TYPE)
4923 tree field;
4925 /* Unions can be returned in registers if every element is
4926 integral, or can be returned in an integer register. */
4927 for (field = TYPE_FIELDS (type);
4928 field;
4929 field = DECL_CHAIN (field))
4931 if (TREE_CODE (field) != FIELD_DECL)
4932 continue;
4934 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4935 return true;
4937 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4938 return true;
4941 return false;
4943 #endif /* not ARM_WINCE */
4945 /* Return all other types in memory. */
4946 return true;
4949 const struct pcs_attribute_arg
4951 const char *arg;
4952 enum arm_pcs value;
4953 } pcs_attribute_args[] =
4955 {"aapcs", ARM_PCS_AAPCS},
4956 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4957 #if 0
4958 /* We could recognize these, but changes would be needed elsewhere
4959 * to implement them. */
4960 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4961 {"atpcs", ARM_PCS_ATPCS},
4962 {"apcs", ARM_PCS_APCS},
4963 #endif
4964 {NULL, ARM_PCS_UNKNOWN}
4967 static enum arm_pcs
4968 arm_pcs_from_attribute (tree attr)
4970 const struct pcs_attribute_arg *ptr;
4971 const char *arg;
4973 /* Get the value of the argument. */
4974 if (TREE_VALUE (attr) == NULL_TREE
4975 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4976 return ARM_PCS_UNKNOWN;
4978 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4980 /* Check it against the list of known arguments. */
4981 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4982 if (streq (arg, ptr->arg))
4983 return ptr->value;
4985 /* An unrecognized interrupt type. */
4986 return ARM_PCS_UNKNOWN;
4989 /* Get the PCS variant to use for this call. TYPE is the function's type
4990 specification, DECL is the specific declartion. DECL may be null if
4991 the call could be indirect or if this is a library call. */
4992 static enum arm_pcs
4993 arm_get_pcs_model (const_tree type, const_tree decl)
4995 bool user_convention = false;
4996 enum arm_pcs user_pcs = arm_pcs_default;
4997 tree attr;
4999 gcc_assert (type);
5001 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5002 if (attr)
5004 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5005 user_convention = true;
5008 if (TARGET_AAPCS_BASED)
5010 /* Detect varargs functions. These always use the base rules
5011 (no argument is ever a candidate for a co-processor
5012 register). */
5013 bool base_rules = stdarg_p (type);
5015 if (user_convention)
5017 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5018 sorry ("non-AAPCS derived PCS variant");
5019 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5020 error ("variadic functions must use the base AAPCS variant");
5023 if (base_rules)
5024 return ARM_PCS_AAPCS;
5025 else if (user_convention)
5026 return user_pcs;
5027 else if (decl && flag_unit_at_a_time)
5029 /* Local functions never leak outside this compilation unit,
5030 so we are free to use whatever conventions are
5031 appropriate. */
5032 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5033 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5034 if (i && i->local)
5035 return ARM_PCS_AAPCS_LOCAL;
5038 else if (user_convention && user_pcs != arm_pcs_default)
5039 sorry ("PCS variant");
5041 /* For everything else we use the target's default. */
5042 return arm_pcs_default;
5046 static void
5047 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5048 const_tree fntype ATTRIBUTE_UNUSED,
5049 rtx libcall ATTRIBUTE_UNUSED,
5050 const_tree fndecl ATTRIBUTE_UNUSED)
5052 /* Record the unallocated VFP registers. */
5053 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5054 pcum->aapcs_vfp_reg_alloc = 0;
5057 /* Walk down the type tree of TYPE counting consecutive base elements.
5058 If *MODEP is VOIDmode, then set it to the first valid floating point
5059 type. If a non-floating point type is found, or if a floating point
5060 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5061 otherwise return the count in the sub-tree. */
5062 static int
5063 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5065 enum machine_mode mode;
5066 HOST_WIDE_INT size;
5068 switch (TREE_CODE (type))
5070 case REAL_TYPE:
5071 mode = TYPE_MODE (type);
5072 if (mode != DFmode && mode != SFmode)
5073 return -1;
5075 if (*modep == VOIDmode)
5076 *modep = mode;
5078 if (*modep == mode)
5079 return 1;
5081 break;
5083 case COMPLEX_TYPE:
5084 mode = TYPE_MODE (TREE_TYPE (type));
5085 if (mode != DFmode && mode != SFmode)
5086 return -1;
5088 if (*modep == VOIDmode)
5089 *modep = mode;
5091 if (*modep == mode)
5092 return 2;
5094 break;
5096 case VECTOR_TYPE:
5097 /* Use V2SImode and V4SImode as representatives of all 64-bit
5098 and 128-bit vector types, whether or not those modes are
5099 supported with the present options. */
5100 size = int_size_in_bytes (type);
5101 switch (size)
5103 case 8:
5104 mode = V2SImode;
5105 break;
5106 case 16:
5107 mode = V4SImode;
5108 break;
5109 default:
5110 return -1;
5113 if (*modep == VOIDmode)
5114 *modep = mode;
5116 /* Vector modes are considered to be opaque: two vectors are
5117 equivalent for the purposes of being homogeneous aggregates
5118 if they are the same size. */
5119 if (*modep == mode)
5120 return 1;
5122 break;
5124 case ARRAY_TYPE:
5126 int count;
5127 tree index = TYPE_DOMAIN (type);
5129 /* Can't handle incomplete types nor sizes that are not
5130 fixed. */
5131 if (!COMPLETE_TYPE_P (type)
5132 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5133 return -1;
5135 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5136 if (count == -1
5137 || !index
5138 || !TYPE_MAX_VALUE (index)
5139 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5140 || !TYPE_MIN_VALUE (index)
5141 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5142 || count < 0)
5143 return -1;
5145 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5146 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5148 /* There must be no padding. */
5149 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5150 return -1;
5152 return count;
5155 case RECORD_TYPE:
5157 int count = 0;
5158 int sub_count;
5159 tree field;
5161 /* Can't handle incomplete types nor sizes that are not
5162 fixed. */
5163 if (!COMPLETE_TYPE_P (type)
5164 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5165 return -1;
5167 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5169 if (TREE_CODE (field) != FIELD_DECL)
5170 continue;
5172 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5173 if (sub_count < 0)
5174 return -1;
5175 count += sub_count;
5178 /* There must be no padding. */
5179 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5180 return -1;
5182 return count;
5185 case UNION_TYPE:
5186 case QUAL_UNION_TYPE:
5188 /* These aren't very interesting except in a degenerate case. */
5189 int count = 0;
5190 int sub_count;
5191 tree field;
5193 /* Can't handle incomplete types nor sizes that are not
5194 fixed. */
5195 if (!COMPLETE_TYPE_P (type)
5196 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5197 return -1;
5199 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5201 if (TREE_CODE (field) != FIELD_DECL)
5202 continue;
5204 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5205 if (sub_count < 0)
5206 return -1;
5207 count = count > sub_count ? count : sub_count;
5210 /* There must be no padding. */
5211 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5212 return -1;
5214 return count;
5217 default:
5218 break;
5221 return -1;
5224 /* Return true if PCS_VARIANT should use VFP registers. */
5225 static bool
5226 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5228 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5230 static bool seen_thumb1_vfp = false;
5232 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5234 sorry ("Thumb-1 hard-float VFP ABI");
5235 /* sorry() is not immediately fatal, so only display this once. */
5236 seen_thumb1_vfp = true;
5239 return true;
5242 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5243 return false;
5245 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5246 (TARGET_VFP_DOUBLE || !is_double));
5249 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5250 suitable for passing or returning in VFP registers for the PCS
5251 variant selected. If it is, then *BASE_MODE is updated to contain
5252 a machine mode describing each element of the argument's type and
5253 *COUNT to hold the number of such elements. */
5254 static bool
5255 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5256 enum machine_mode mode, const_tree type,
5257 enum machine_mode *base_mode, int *count)
5259 enum machine_mode new_mode = VOIDmode;
5261 /* If we have the type information, prefer that to working things
5262 out from the mode. */
5263 if (type)
5265 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5267 if (ag_count > 0 && ag_count <= 4)
5268 *count = ag_count;
5269 else
5270 return false;
5272 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5273 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5274 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5276 *count = 1;
5277 new_mode = mode;
5279 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5281 *count = 2;
5282 new_mode = (mode == DCmode ? DFmode : SFmode);
5284 else
5285 return false;
5288 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5289 return false;
5291 *base_mode = new_mode;
5292 return true;
5295 static bool
5296 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5297 enum machine_mode mode, const_tree type)
5299 int count ATTRIBUTE_UNUSED;
5300 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5302 if (!use_vfp_abi (pcs_variant, false))
5303 return false;
5304 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5305 &ag_mode, &count);
5308 static bool
5309 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5310 const_tree type)
5312 if (!use_vfp_abi (pcum->pcs_variant, false))
5313 return false;
5315 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5316 &pcum->aapcs_vfp_rmode,
5317 &pcum->aapcs_vfp_rcount);
5320 static bool
5321 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5322 const_tree type ATTRIBUTE_UNUSED)
5324 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5325 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5326 int regno;
5328 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5329 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5331 pcum->aapcs_vfp_reg_alloc = mask << regno;
5332 if (mode == BLKmode
5333 || (mode == TImode && ! TARGET_NEON)
5334 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5336 int i;
5337 int rcount = pcum->aapcs_vfp_rcount;
5338 int rshift = shift;
5339 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5340 rtx par;
5341 if (!TARGET_NEON)
5343 /* Avoid using unsupported vector modes. */
5344 if (rmode == V2SImode)
5345 rmode = DImode;
5346 else if (rmode == V4SImode)
5348 rmode = DImode;
5349 rcount *= 2;
5350 rshift /= 2;
5353 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5354 for (i = 0; i < rcount; i++)
5356 rtx tmp = gen_rtx_REG (rmode,
5357 FIRST_VFP_REGNUM + regno + i * rshift);
5358 tmp = gen_rtx_EXPR_LIST
5359 (VOIDmode, tmp,
5360 GEN_INT (i * GET_MODE_SIZE (rmode)));
5361 XVECEXP (par, 0, i) = tmp;
5364 pcum->aapcs_reg = par;
5366 else
5367 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5368 return true;
5370 return false;
5373 static rtx
5374 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5375 enum machine_mode mode,
5376 const_tree type ATTRIBUTE_UNUSED)
5378 if (!use_vfp_abi (pcs_variant, false))
5379 return NULL;
5381 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5383 int count;
5384 enum machine_mode ag_mode;
5385 int i;
5386 rtx par;
5387 int shift;
5389 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5390 &ag_mode, &count);
5392 if (!TARGET_NEON)
5394 if (ag_mode == V2SImode)
5395 ag_mode = DImode;
5396 else if (ag_mode == V4SImode)
5398 ag_mode = DImode;
5399 count *= 2;
5402 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5403 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5404 for (i = 0; i < count; i++)
5406 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5407 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5408 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5409 XVECEXP (par, 0, i) = tmp;
5412 return par;
5415 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5418 static void
5419 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5420 enum machine_mode mode ATTRIBUTE_UNUSED,
5421 const_tree type ATTRIBUTE_UNUSED)
5423 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5424 pcum->aapcs_vfp_reg_alloc = 0;
5425 return;
5428 #define AAPCS_CP(X) \
5430 aapcs_ ## X ## _cum_init, \
5431 aapcs_ ## X ## _is_call_candidate, \
5432 aapcs_ ## X ## _allocate, \
5433 aapcs_ ## X ## _is_return_candidate, \
5434 aapcs_ ## X ## _allocate_return_reg, \
5435 aapcs_ ## X ## _advance \
5438 /* Table of co-processors that can be used to pass arguments in
5439 registers. Idealy no arugment should be a candidate for more than
5440 one co-processor table entry, but the table is processed in order
5441 and stops after the first match. If that entry then fails to put
5442 the argument into a co-processor register, the argument will go on
5443 the stack. */
5444 static struct
5446 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5447 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5449 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5450 BLKmode) is a candidate for this co-processor's registers; this
5451 function should ignore any position-dependent state in
5452 CUMULATIVE_ARGS and only use call-type dependent information. */
5453 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5455 /* Return true if the argument does get a co-processor register; it
5456 should set aapcs_reg to an RTX of the register allocated as is
5457 required for a return from FUNCTION_ARG. */
5458 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5460 /* Return true if a result of mode MODE (or type TYPE if MODE is
5461 BLKmode) is can be returned in this co-processor's registers. */
5462 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5464 /* Allocate and return an RTX element to hold the return type of a
5465 call, this routine must not fail and will only be called if
5466 is_return_candidate returned true with the same parameters. */
5467 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5469 /* Finish processing this argument and prepare to start processing
5470 the next one. */
5471 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5472 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5474 AAPCS_CP(vfp)
5477 #undef AAPCS_CP
5479 static int
5480 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5481 const_tree type)
5483 int i;
5485 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5486 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5487 return i;
5489 return -1;
5492 static int
5493 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5495 /* We aren't passed a decl, so we can't check that a call is local.
5496 However, it isn't clear that that would be a win anyway, since it
5497 might limit some tail-calling opportunities. */
5498 enum arm_pcs pcs_variant;
5500 if (fntype)
5502 const_tree fndecl = NULL_TREE;
5504 if (TREE_CODE (fntype) == FUNCTION_DECL)
5506 fndecl = fntype;
5507 fntype = TREE_TYPE (fntype);
5510 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5512 else
5513 pcs_variant = arm_pcs_default;
5515 if (pcs_variant != ARM_PCS_AAPCS)
5517 int i;
5519 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5520 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5521 TYPE_MODE (type),
5522 type))
5523 return i;
5525 return -1;
5528 static rtx
5529 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5530 const_tree fntype)
5532 /* We aren't passed a decl, so we can't check that a call is local.
5533 However, it isn't clear that that would be a win anyway, since it
5534 might limit some tail-calling opportunities. */
5535 enum arm_pcs pcs_variant;
5536 int unsignedp ATTRIBUTE_UNUSED;
5538 if (fntype)
5540 const_tree fndecl = NULL_TREE;
5542 if (TREE_CODE (fntype) == FUNCTION_DECL)
5544 fndecl = fntype;
5545 fntype = TREE_TYPE (fntype);
5548 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5550 else
5551 pcs_variant = arm_pcs_default;
5553 /* Promote integer types. */
5554 if (type && INTEGRAL_TYPE_P (type))
5555 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5557 if (pcs_variant != ARM_PCS_AAPCS)
5559 int i;
5561 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5562 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5563 type))
5564 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5565 mode, type);
5568 /* Promotes small structs returned in a register to full-word size
5569 for big-endian AAPCS. */
5570 if (type && arm_return_in_msb (type))
5572 HOST_WIDE_INT size = int_size_in_bytes (type);
5573 if (size % UNITS_PER_WORD != 0)
5575 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5576 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5580 return gen_rtx_REG (mode, R0_REGNUM);
5583 static rtx
5584 aapcs_libcall_value (enum machine_mode mode)
5586 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5587 && GET_MODE_SIZE (mode) <= 4)
5588 mode = SImode;
5590 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5593 /* Lay out a function argument using the AAPCS rules. The rule
5594 numbers referred to here are those in the AAPCS. */
5595 static void
5596 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5597 const_tree type, bool named)
5599 int nregs, nregs2;
5600 int ncrn;
5602 /* We only need to do this once per argument. */
5603 if (pcum->aapcs_arg_processed)
5604 return;
5606 pcum->aapcs_arg_processed = true;
5608 /* Special case: if named is false then we are handling an incoming
5609 anonymous argument which is on the stack. */
5610 if (!named)
5611 return;
5613 /* Is this a potential co-processor register candidate? */
5614 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5616 int slot = aapcs_select_call_coproc (pcum, mode, type);
5617 pcum->aapcs_cprc_slot = slot;
5619 /* We don't have to apply any of the rules from part B of the
5620 preparation phase, these are handled elsewhere in the
5621 compiler. */
5623 if (slot >= 0)
5625 /* A Co-processor register candidate goes either in its own
5626 class of registers or on the stack. */
5627 if (!pcum->aapcs_cprc_failed[slot])
5629 /* C1.cp - Try to allocate the argument to co-processor
5630 registers. */
5631 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5632 return;
5634 /* C2.cp - Put the argument on the stack and note that we
5635 can't assign any more candidates in this slot. We also
5636 need to note that we have allocated stack space, so that
5637 we won't later try to split a non-cprc candidate between
5638 core registers and the stack. */
5639 pcum->aapcs_cprc_failed[slot] = true;
5640 pcum->can_split = false;
5643 /* We didn't get a register, so this argument goes on the
5644 stack. */
5645 gcc_assert (pcum->can_split == false);
5646 return;
5650 /* C3 - For double-word aligned arguments, round the NCRN up to the
5651 next even number. */
5652 ncrn = pcum->aapcs_ncrn;
5653 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5654 ncrn++;
5656 nregs = ARM_NUM_REGS2(mode, type);
5658 /* Sigh, this test should really assert that nregs > 0, but a GCC
5659 extension allows empty structs and then gives them empty size; it
5660 then allows such a structure to be passed by value. For some of
5661 the code below we have to pretend that such an argument has
5662 non-zero size so that we 'locate' it correctly either in
5663 registers or on the stack. */
5664 gcc_assert (nregs >= 0);
5666 nregs2 = nregs ? nregs : 1;
5668 /* C4 - Argument fits entirely in core registers. */
5669 if (ncrn + nregs2 <= NUM_ARG_REGS)
5671 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5672 pcum->aapcs_next_ncrn = ncrn + nregs;
5673 return;
5676 /* C5 - Some core registers left and there are no arguments already
5677 on the stack: split this argument between the remaining core
5678 registers and the stack. */
5679 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5681 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5682 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5683 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5684 return;
5687 /* C6 - NCRN is set to 4. */
5688 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5690 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5691 return;
5694 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5695 for a call to a function whose data type is FNTYPE.
5696 For a library call, FNTYPE is NULL. */
5697 void
5698 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5699 rtx libname,
5700 tree fndecl ATTRIBUTE_UNUSED)
5702 /* Long call handling. */
5703 if (fntype)
5704 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5705 else
5706 pcum->pcs_variant = arm_pcs_default;
5708 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5710 if (arm_libcall_uses_aapcs_base (libname))
5711 pcum->pcs_variant = ARM_PCS_AAPCS;
5713 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5714 pcum->aapcs_reg = NULL_RTX;
5715 pcum->aapcs_partial = 0;
5716 pcum->aapcs_arg_processed = false;
5717 pcum->aapcs_cprc_slot = -1;
5718 pcum->can_split = true;
5720 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5722 int i;
5724 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5726 pcum->aapcs_cprc_failed[i] = false;
5727 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5730 return;
5733 /* Legacy ABIs */
5735 /* On the ARM, the offset starts at 0. */
5736 pcum->nregs = 0;
5737 pcum->iwmmxt_nregs = 0;
5738 pcum->can_split = true;
5740 /* Varargs vectors are treated the same as long long.
5741 named_count avoids having to change the way arm handles 'named' */
5742 pcum->named_count = 0;
5743 pcum->nargs = 0;
5745 if (TARGET_REALLY_IWMMXT && fntype)
5747 tree fn_arg;
5749 for (fn_arg = TYPE_ARG_TYPES (fntype);
5750 fn_arg;
5751 fn_arg = TREE_CHAIN (fn_arg))
5752 pcum->named_count += 1;
5754 if (! pcum->named_count)
5755 pcum->named_count = INT_MAX;
5759 /* Return true if we use LRA instead of reload pass. */
5760 static bool
5761 arm_lra_p (void)
5763 return arm_lra_flag;
5766 /* Return true if mode/type need doubleword alignment. */
5767 static bool
5768 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5770 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5771 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5775 /* Determine where to put an argument to a function.
5776 Value is zero to push the argument on the stack,
5777 or a hard register in which to store the argument.
5779 MODE is the argument's machine mode.
5780 TYPE is the data type of the argument (as a tree).
5781 This is null for libcalls where that information may
5782 not be available.
5783 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5784 the preceding args and about the function being called.
5785 NAMED is nonzero if this argument is a named parameter
5786 (otherwise it is an extra parameter matching an ellipsis).
5788 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5789 other arguments are passed on the stack. If (NAMED == 0) (which happens
5790 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5791 defined), say it is passed in the stack (function_prologue will
5792 indeed make it pass in the stack if necessary). */
5794 static rtx
5795 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5796 const_tree type, bool named)
5798 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5799 int nregs;
5801 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5802 a call insn (op3 of a call_value insn). */
5803 if (mode == VOIDmode)
5804 return const0_rtx;
5806 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5808 aapcs_layout_arg (pcum, mode, type, named);
5809 return pcum->aapcs_reg;
5812 /* Varargs vectors are treated the same as long long.
5813 named_count avoids having to change the way arm handles 'named' */
5814 if (TARGET_IWMMXT_ABI
5815 && arm_vector_mode_supported_p (mode)
5816 && pcum->named_count > pcum->nargs + 1)
5818 if (pcum->iwmmxt_nregs <= 9)
5819 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5820 else
5822 pcum->can_split = false;
5823 return NULL_RTX;
5827 /* Put doubleword aligned quantities in even register pairs. */
5828 if (pcum->nregs & 1
5829 && ARM_DOUBLEWORD_ALIGN
5830 && arm_needs_doubleword_align (mode, type))
5831 pcum->nregs++;
5833 /* Only allow splitting an arg between regs and memory if all preceding
5834 args were allocated to regs. For args passed by reference we only count
5835 the reference pointer. */
5836 if (pcum->can_split)
5837 nregs = 1;
5838 else
5839 nregs = ARM_NUM_REGS2 (mode, type);
5841 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5842 return NULL_RTX;
5844 return gen_rtx_REG (mode, pcum->nregs);
5847 static unsigned int
5848 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5850 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5851 ? DOUBLEWORD_ALIGNMENT
5852 : PARM_BOUNDARY);
5855 static int
5856 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5857 tree type, bool named)
5859 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5860 int nregs = pcum->nregs;
5862 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5864 aapcs_layout_arg (pcum, mode, type, named);
5865 return pcum->aapcs_partial;
5868 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5869 return 0;
5871 if (NUM_ARG_REGS > nregs
5872 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5873 && pcum->can_split)
5874 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5876 return 0;
5879 /* Update the data in PCUM to advance over an argument
5880 of mode MODE and data type TYPE.
5881 (TYPE is null for libcalls where that information may not be available.) */
5883 static void
5884 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5885 const_tree type, bool named)
5887 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5889 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5891 aapcs_layout_arg (pcum, mode, type, named);
5893 if (pcum->aapcs_cprc_slot >= 0)
5895 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5896 type);
5897 pcum->aapcs_cprc_slot = -1;
5900 /* Generic stuff. */
5901 pcum->aapcs_arg_processed = false;
5902 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5903 pcum->aapcs_reg = NULL_RTX;
5904 pcum->aapcs_partial = 0;
5906 else
5908 pcum->nargs += 1;
5909 if (arm_vector_mode_supported_p (mode)
5910 && pcum->named_count > pcum->nargs
5911 && TARGET_IWMMXT_ABI)
5912 pcum->iwmmxt_nregs += 1;
5913 else
5914 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5918 /* Variable sized types are passed by reference. This is a GCC
5919 extension to the ARM ABI. */
5921 static bool
5922 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5923 enum machine_mode mode ATTRIBUTE_UNUSED,
5924 const_tree type, bool named ATTRIBUTE_UNUSED)
5926 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5929 /* Encode the current state of the #pragma [no_]long_calls. */
5930 typedef enum
5932 OFF, /* No #pragma [no_]long_calls is in effect. */
5933 LONG, /* #pragma long_calls is in effect. */
5934 SHORT /* #pragma no_long_calls is in effect. */
5935 } arm_pragma_enum;
5937 static arm_pragma_enum arm_pragma_long_calls = OFF;
5939 void
5940 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5942 arm_pragma_long_calls = LONG;
5945 void
5946 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5948 arm_pragma_long_calls = SHORT;
5951 void
5952 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5954 arm_pragma_long_calls = OFF;
5957 /* Handle an attribute requiring a FUNCTION_DECL;
5958 arguments as in struct attribute_spec.handler. */
5959 static tree
5960 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5961 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5963 if (TREE_CODE (*node) != FUNCTION_DECL)
5965 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5966 name);
5967 *no_add_attrs = true;
5970 return NULL_TREE;
5973 /* Handle an "interrupt" or "isr" attribute;
5974 arguments as in struct attribute_spec.handler. */
5975 static tree
5976 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5977 bool *no_add_attrs)
5979 if (DECL_P (*node))
5981 if (TREE_CODE (*node) != FUNCTION_DECL)
5983 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5984 name);
5985 *no_add_attrs = true;
5987 /* FIXME: the argument if any is checked for type attributes;
5988 should it be checked for decl ones? */
5990 else
5992 if (TREE_CODE (*node) == FUNCTION_TYPE
5993 || TREE_CODE (*node) == METHOD_TYPE)
5995 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5997 warning (OPT_Wattributes, "%qE attribute ignored",
5998 name);
5999 *no_add_attrs = true;
6002 else if (TREE_CODE (*node) == POINTER_TYPE
6003 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6004 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6005 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6007 *node = build_variant_type_copy (*node);
6008 TREE_TYPE (*node) = build_type_attribute_variant
6009 (TREE_TYPE (*node),
6010 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6011 *no_add_attrs = true;
6013 else
6015 /* Possibly pass this attribute on from the type to a decl. */
6016 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6017 | (int) ATTR_FLAG_FUNCTION_NEXT
6018 | (int) ATTR_FLAG_ARRAY_NEXT))
6020 *no_add_attrs = true;
6021 return tree_cons (name, args, NULL_TREE);
6023 else
6025 warning (OPT_Wattributes, "%qE attribute ignored",
6026 name);
6031 return NULL_TREE;
6034 /* Handle a "pcs" attribute; arguments as in struct
6035 attribute_spec.handler. */
6036 static tree
6037 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6038 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6040 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6042 warning (OPT_Wattributes, "%qE attribute ignored", name);
6043 *no_add_attrs = true;
6045 return NULL_TREE;
6048 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6049 /* Handle the "notshared" attribute. This attribute is another way of
6050 requesting hidden visibility. ARM's compiler supports
6051 "__declspec(notshared)"; we support the same thing via an
6052 attribute. */
6054 static tree
6055 arm_handle_notshared_attribute (tree *node,
6056 tree name ATTRIBUTE_UNUSED,
6057 tree args ATTRIBUTE_UNUSED,
6058 int flags ATTRIBUTE_UNUSED,
6059 bool *no_add_attrs)
6061 tree decl = TYPE_NAME (*node);
6063 if (decl)
6065 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6066 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6067 *no_add_attrs = false;
6069 return NULL_TREE;
6071 #endif
6073 /* Return 0 if the attributes for two types are incompatible, 1 if they
6074 are compatible, and 2 if they are nearly compatible (which causes a
6075 warning to be generated). */
6076 static int
6077 arm_comp_type_attributes (const_tree type1, const_tree type2)
6079 int l1, l2, s1, s2;
6081 /* Check for mismatch of non-default calling convention. */
6082 if (TREE_CODE (type1) != FUNCTION_TYPE)
6083 return 1;
6085 /* Check for mismatched call attributes. */
6086 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6087 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6088 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6089 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6091 /* Only bother to check if an attribute is defined. */
6092 if (l1 | l2 | s1 | s2)
6094 /* If one type has an attribute, the other must have the same attribute. */
6095 if ((l1 != l2) || (s1 != s2))
6096 return 0;
6098 /* Disallow mixed attributes. */
6099 if ((l1 & s2) || (l2 & s1))
6100 return 0;
6103 /* Check for mismatched ISR attribute. */
6104 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6105 if (! l1)
6106 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6107 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6108 if (! l2)
6109 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6110 if (l1 != l2)
6111 return 0;
6113 return 1;
6116 /* Assigns default attributes to newly defined type. This is used to
6117 set short_call/long_call attributes for function types of
6118 functions defined inside corresponding #pragma scopes. */
6119 static void
6120 arm_set_default_type_attributes (tree type)
6122 /* Add __attribute__ ((long_call)) to all functions, when
6123 inside #pragma long_calls or __attribute__ ((short_call)),
6124 when inside #pragma no_long_calls. */
6125 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6127 tree type_attr_list, attr_name;
6128 type_attr_list = TYPE_ATTRIBUTES (type);
6130 if (arm_pragma_long_calls == LONG)
6131 attr_name = get_identifier ("long_call");
6132 else if (arm_pragma_long_calls == SHORT)
6133 attr_name = get_identifier ("short_call");
6134 else
6135 return;
6137 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6138 TYPE_ATTRIBUTES (type) = type_attr_list;
6142 /* Return true if DECL is known to be linked into section SECTION. */
6144 static bool
6145 arm_function_in_section_p (tree decl, section *section)
6147 /* We can only be certain about functions defined in the same
6148 compilation unit. */
6149 if (!TREE_STATIC (decl))
6150 return false;
6152 /* Make sure that SYMBOL always binds to the definition in this
6153 compilation unit. */
6154 if (!targetm.binds_local_p (decl))
6155 return false;
6157 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6158 if (!DECL_SECTION_NAME (decl))
6160 /* Make sure that we will not create a unique section for DECL. */
6161 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6162 return false;
6165 return function_section (decl) == section;
6168 /* Return nonzero if a 32-bit "long_call" should be generated for
6169 a call from the current function to DECL. We generate a long_call
6170 if the function:
6172 a. has an __attribute__((long call))
6173 or b. is within the scope of a #pragma long_calls
6174 or c. the -mlong-calls command line switch has been specified
6176 However we do not generate a long call if the function:
6178 d. has an __attribute__ ((short_call))
6179 or e. is inside the scope of a #pragma no_long_calls
6180 or f. is defined in the same section as the current function. */
6182 bool
6183 arm_is_long_call_p (tree decl)
6185 tree attrs;
6187 if (!decl)
6188 return TARGET_LONG_CALLS;
6190 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6191 if (lookup_attribute ("short_call", attrs))
6192 return false;
6194 /* For "f", be conservative, and only cater for cases in which the
6195 whole of the current function is placed in the same section. */
6196 if (!flag_reorder_blocks_and_partition
6197 && TREE_CODE (decl) == FUNCTION_DECL
6198 && arm_function_in_section_p (decl, current_function_section ()))
6199 return false;
6201 if (lookup_attribute ("long_call", attrs))
6202 return true;
6204 return TARGET_LONG_CALLS;
6207 /* Return nonzero if it is ok to make a tail-call to DECL. */
6208 static bool
6209 arm_function_ok_for_sibcall (tree decl, tree exp)
6211 unsigned long func_type;
6213 if (cfun->machine->sibcall_blocked)
6214 return false;
6216 /* Never tailcall something if we are generating code for Thumb-1. */
6217 if (TARGET_THUMB1)
6218 return false;
6220 /* The PIC register is live on entry to VxWorks PLT entries, so we
6221 must make the call before restoring the PIC register. */
6222 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6223 return false;
6225 /* If we are interworking and the function is not declared static
6226 then we can't tail-call it unless we know that it exists in this
6227 compilation unit (since it might be a Thumb routine). */
6228 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6229 && !TREE_ASM_WRITTEN (decl))
6230 return false;
6232 func_type = arm_current_func_type ();
6233 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6234 if (IS_INTERRUPT (func_type))
6235 return false;
6237 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6239 /* Check that the return value locations are the same. For
6240 example that we aren't returning a value from the sibling in
6241 a VFP register but then need to transfer it to a core
6242 register. */
6243 rtx a, b;
6245 a = arm_function_value (TREE_TYPE (exp), decl, false);
6246 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6247 cfun->decl, false);
6248 if (!rtx_equal_p (a, b))
6249 return false;
6252 /* Never tailcall if function may be called with a misaligned SP. */
6253 if (IS_STACKALIGN (func_type))
6254 return false;
6256 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6257 references should become a NOP. Don't convert such calls into
6258 sibling calls. */
6259 if (TARGET_AAPCS_BASED
6260 && arm_abi == ARM_ABI_AAPCS
6261 && decl
6262 && DECL_WEAK (decl))
6263 return false;
6265 /* Everything else is ok. */
6266 return true;
6270 /* Addressing mode support functions. */
6272 /* Return nonzero if X is a legitimate immediate operand when compiling
6273 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6275 legitimate_pic_operand_p (rtx x)
6277 if (GET_CODE (x) == SYMBOL_REF
6278 || (GET_CODE (x) == CONST
6279 && GET_CODE (XEXP (x, 0)) == PLUS
6280 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6281 return 0;
6283 return 1;
6286 /* Record that the current function needs a PIC register. Initialize
6287 cfun->machine->pic_reg if we have not already done so. */
6289 static void
6290 require_pic_register (void)
6292 /* A lot of the logic here is made obscure by the fact that this
6293 routine gets called as part of the rtx cost estimation process.
6294 We don't want those calls to affect any assumptions about the real
6295 function; and further, we can't call entry_of_function() until we
6296 start the real expansion process. */
6297 if (!crtl->uses_pic_offset_table)
6299 gcc_assert (can_create_pseudo_p ());
6300 if (arm_pic_register != INVALID_REGNUM
6301 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6303 if (!cfun->machine->pic_reg)
6304 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6306 /* Play games to avoid marking the function as needing pic
6307 if we are being called as part of the cost-estimation
6308 process. */
6309 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6310 crtl->uses_pic_offset_table = 1;
6312 else
6314 rtx seq, insn;
6316 if (!cfun->machine->pic_reg)
6317 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6319 /* Play games to avoid marking the function as needing pic
6320 if we are being called as part of the cost-estimation
6321 process. */
6322 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6324 crtl->uses_pic_offset_table = 1;
6325 start_sequence ();
6327 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6328 && arm_pic_register > LAST_LO_REGNUM)
6329 emit_move_insn (cfun->machine->pic_reg,
6330 gen_rtx_REG (Pmode, arm_pic_register));
6331 else
6332 arm_load_pic_register (0UL);
6334 seq = get_insns ();
6335 end_sequence ();
6337 for (insn = seq; insn; insn = NEXT_INSN (insn))
6338 if (INSN_P (insn))
6339 INSN_LOCATION (insn) = prologue_location;
6341 /* We can be called during expansion of PHI nodes, where
6342 we can't yet emit instructions directly in the final
6343 insn stream. Queue the insns on the entry edge, they will
6344 be committed after everything else is expanded. */
6345 insert_insn_on_edge (seq,
6346 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6353 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6355 if (GET_CODE (orig) == SYMBOL_REF
6356 || GET_CODE (orig) == LABEL_REF)
6358 rtx insn;
6360 if (reg == 0)
6362 gcc_assert (can_create_pseudo_p ());
6363 reg = gen_reg_rtx (Pmode);
6366 /* VxWorks does not impose a fixed gap between segments; the run-time
6367 gap can be different from the object-file gap. We therefore can't
6368 use GOTOFF unless we are absolutely sure that the symbol is in the
6369 same segment as the GOT. Unfortunately, the flexibility of linker
6370 scripts means that we can't be sure of that in general, so assume
6371 that GOTOFF is never valid on VxWorks. */
6372 if ((GET_CODE (orig) == LABEL_REF
6373 || (GET_CODE (orig) == SYMBOL_REF &&
6374 SYMBOL_REF_LOCAL_P (orig)))
6375 && NEED_GOT_RELOC
6376 && arm_pic_data_is_text_relative)
6377 insn = arm_pic_static_addr (orig, reg);
6378 else
6380 rtx pat;
6381 rtx mem;
6383 /* If this function doesn't have a pic register, create one now. */
6384 require_pic_register ();
6386 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6388 /* Make the MEM as close to a constant as possible. */
6389 mem = SET_SRC (pat);
6390 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6391 MEM_READONLY_P (mem) = 1;
6392 MEM_NOTRAP_P (mem) = 1;
6394 insn = emit_insn (pat);
6397 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6398 by loop. */
6399 set_unique_reg_note (insn, REG_EQUAL, orig);
6401 return reg;
6403 else if (GET_CODE (orig) == CONST)
6405 rtx base, offset;
6407 if (GET_CODE (XEXP (orig, 0)) == PLUS
6408 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6409 return orig;
6411 /* Handle the case where we have: const (UNSPEC_TLS). */
6412 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6413 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6414 return orig;
6416 /* Handle the case where we have:
6417 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6418 CONST_INT. */
6419 if (GET_CODE (XEXP (orig, 0)) == PLUS
6420 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6421 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6423 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6424 return orig;
6427 if (reg == 0)
6429 gcc_assert (can_create_pseudo_p ());
6430 reg = gen_reg_rtx (Pmode);
6433 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6435 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6436 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6437 base == reg ? 0 : reg);
6439 if (CONST_INT_P (offset))
6441 /* The base register doesn't really matter, we only want to
6442 test the index for the appropriate mode. */
6443 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6445 gcc_assert (can_create_pseudo_p ());
6446 offset = force_reg (Pmode, offset);
6449 if (CONST_INT_P (offset))
6450 return plus_constant (Pmode, base, INTVAL (offset));
6453 if (GET_MODE_SIZE (mode) > 4
6454 && (GET_MODE_CLASS (mode) == MODE_INT
6455 || TARGET_SOFT_FLOAT))
6457 emit_insn (gen_addsi3 (reg, base, offset));
6458 return reg;
6461 return gen_rtx_PLUS (Pmode, base, offset);
6464 return orig;
6468 /* Find a spare register to use during the prolog of a function. */
6470 static int
6471 thumb_find_work_register (unsigned long pushed_regs_mask)
6473 int reg;
6475 /* Check the argument registers first as these are call-used. The
6476 register allocation order means that sometimes r3 might be used
6477 but earlier argument registers might not, so check them all. */
6478 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6479 if (!df_regs_ever_live_p (reg))
6480 return reg;
6482 /* Before going on to check the call-saved registers we can try a couple
6483 more ways of deducing that r3 is available. The first is when we are
6484 pushing anonymous arguments onto the stack and we have less than 4
6485 registers worth of fixed arguments(*). In this case r3 will be part of
6486 the variable argument list and so we can be sure that it will be
6487 pushed right at the start of the function. Hence it will be available
6488 for the rest of the prologue.
6489 (*): ie crtl->args.pretend_args_size is greater than 0. */
6490 if (cfun->machine->uses_anonymous_args
6491 && crtl->args.pretend_args_size > 0)
6492 return LAST_ARG_REGNUM;
6494 /* The other case is when we have fixed arguments but less than 4 registers
6495 worth. In this case r3 might be used in the body of the function, but
6496 it is not being used to convey an argument into the function. In theory
6497 we could just check crtl->args.size to see how many bytes are
6498 being passed in argument registers, but it seems that it is unreliable.
6499 Sometimes it will have the value 0 when in fact arguments are being
6500 passed. (See testcase execute/20021111-1.c for an example). So we also
6501 check the args_info.nregs field as well. The problem with this field is
6502 that it makes no allowances for arguments that are passed to the
6503 function but which are not used. Hence we could miss an opportunity
6504 when a function has an unused argument in r3. But it is better to be
6505 safe than to be sorry. */
6506 if (! cfun->machine->uses_anonymous_args
6507 && crtl->args.size >= 0
6508 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6509 && (TARGET_AAPCS_BASED
6510 ? crtl->args.info.aapcs_ncrn < 4
6511 : crtl->args.info.nregs < 4))
6512 return LAST_ARG_REGNUM;
6514 /* Otherwise look for a call-saved register that is going to be pushed. */
6515 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6516 if (pushed_regs_mask & (1 << reg))
6517 return reg;
6519 if (TARGET_THUMB2)
6521 /* Thumb-2 can use high regs. */
6522 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6523 if (pushed_regs_mask & (1 << reg))
6524 return reg;
6526 /* Something went wrong - thumb_compute_save_reg_mask()
6527 should have arranged for a suitable register to be pushed. */
6528 gcc_unreachable ();
6531 static GTY(()) int pic_labelno;
6533 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6534 low register. */
6536 void
6537 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6539 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6541 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6542 return;
6544 gcc_assert (flag_pic);
6546 pic_reg = cfun->machine->pic_reg;
6547 if (TARGET_VXWORKS_RTP)
6549 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6550 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6551 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6553 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6555 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6556 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6558 else
6560 /* We use an UNSPEC rather than a LABEL_REF because this label
6561 never appears in the code stream. */
6563 labelno = GEN_INT (pic_labelno++);
6564 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6565 l1 = gen_rtx_CONST (VOIDmode, l1);
6567 /* On the ARM the PC register contains 'dot + 8' at the time of the
6568 addition, on the Thumb it is 'dot + 4'. */
6569 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6570 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6571 UNSPEC_GOTSYM_OFF);
6572 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6574 if (TARGET_32BIT)
6576 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6578 else /* TARGET_THUMB1 */
6580 if (arm_pic_register != INVALID_REGNUM
6581 && REGNO (pic_reg) > LAST_LO_REGNUM)
6583 /* We will have pushed the pic register, so we should always be
6584 able to find a work register. */
6585 pic_tmp = gen_rtx_REG (SImode,
6586 thumb_find_work_register (saved_regs));
6587 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6588 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6589 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6591 else if (arm_pic_register != INVALID_REGNUM
6592 && arm_pic_register > LAST_LO_REGNUM
6593 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6595 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6596 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6597 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6599 else
6600 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6604 /* Need to emit this whether or not we obey regdecls,
6605 since setjmp/longjmp can cause life info to screw up. */
6606 emit_use (pic_reg);
6609 /* Generate code to load the address of a static var when flag_pic is set. */
6610 static rtx
6611 arm_pic_static_addr (rtx orig, rtx reg)
6613 rtx l1, labelno, offset_rtx, insn;
6615 gcc_assert (flag_pic);
6617 /* We use an UNSPEC rather than a LABEL_REF because this label
6618 never appears in the code stream. */
6619 labelno = GEN_INT (pic_labelno++);
6620 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6621 l1 = gen_rtx_CONST (VOIDmode, l1);
6623 /* On the ARM the PC register contains 'dot + 8' at the time of the
6624 addition, on the Thumb it is 'dot + 4'. */
6625 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6626 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6627 UNSPEC_SYMBOL_OFFSET);
6628 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6630 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6631 return insn;
6634 /* Return nonzero if X is valid as an ARM state addressing register. */
6635 static int
6636 arm_address_register_rtx_p (rtx x, int strict_p)
6638 int regno;
6640 if (!REG_P (x))
6641 return 0;
6643 regno = REGNO (x);
6645 if (strict_p)
6646 return ARM_REGNO_OK_FOR_BASE_P (regno);
6648 return (regno <= LAST_ARM_REGNUM
6649 || regno >= FIRST_PSEUDO_REGISTER
6650 || regno == FRAME_POINTER_REGNUM
6651 || regno == ARG_POINTER_REGNUM);
6654 /* Return TRUE if this rtx is the difference of a symbol and a label,
6655 and will reduce to a PC-relative relocation in the object file.
6656 Expressions like this can be left alone when generating PIC, rather
6657 than forced through the GOT. */
6658 static int
6659 pcrel_constant_p (rtx x)
6661 if (GET_CODE (x) == MINUS)
6662 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6664 return FALSE;
6667 /* Return true if X will surely end up in an index register after next
6668 splitting pass. */
6669 static bool
6670 will_be_in_index_register (const_rtx x)
6672 /* arm.md: calculate_pic_address will split this into a register. */
6673 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6676 /* Return nonzero if X is a valid ARM state address operand. */
6678 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6679 int strict_p)
6681 bool use_ldrd;
6682 enum rtx_code code = GET_CODE (x);
6684 if (arm_address_register_rtx_p (x, strict_p))
6685 return 1;
6687 use_ldrd = (TARGET_LDRD
6688 && (mode == DImode
6689 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6691 if (code == POST_INC || code == PRE_DEC
6692 || ((code == PRE_INC || code == POST_DEC)
6693 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6694 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6696 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6697 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6698 && GET_CODE (XEXP (x, 1)) == PLUS
6699 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6701 rtx addend = XEXP (XEXP (x, 1), 1);
6703 /* Don't allow ldrd post increment by register because it's hard
6704 to fixup invalid register choices. */
6705 if (use_ldrd
6706 && GET_CODE (x) == POST_MODIFY
6707 && REG_P (addend))
6708 return 0;
6710 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6711 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6714 /* After reload constants split into minipools will have addresses
6715 from a LABEL_REF. */
6716 else if (reload_completed
6717 && (code == LABEL_REF
6718 || (code == CONST
6719 && GET_CODE (XEXP (x, 0)) == PLUS
6720 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6721 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6722 return 1;
6724 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6725 return 0;
6727 else if (code == PLUS)
6729 rtx xop0 = XEXP (x, 0);
6730 rtx xop1 = XEXP (x, 1);
6732 return ((arm_address_register_rtx_p (xop0, strict_p)
6733 && ((CONST_INT_P (xop1)
6734 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6735 || (!strict_p && will_be_in_index_register (xop1))))
6736 || (arm_address_register_rtx_p (xop1, strict_p)
6737 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6740 #if 0
6741 /* Reload currently can't handle MINUS, so disable this for now */
6742 else if (GET_CODE (x) == MINUS)
6744 rtx xop0 = XEXP (x, 0);
6745 rtx xop1 = XEXP (x, 1);
6747 return (arm_address_register_rtx_p (xop0, strict_p)
6748 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6750 #endif
6752 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6753 && code == SYMBOL_REF
6754 && CONSTANT_POOL_ADDRESS_P (x)
6755 && ! (flag_pic
6756 && symbol_mentioned_p (get_pool_constant (x))
6757 && ! pcrel_constant_p (get_pool_constant (x))))
6758 return 1;
6760 return 0;
6763 /* Return nonzero if X is a valid Thumb-2 address operand. */
6764 static int
6765 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6767 bool use_ldrd;
6768 enum rtx_code code = GET_CODE (x);
6770 if (arm_address_register_rtx_p (x, strict_p))
6771 return 1;
6773 use_ldrd = (TARGET_LDRD
6774 && (mode == DImode
6775 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6777 if (code == POST_INC || code == PRE_DEC
6778 || ((code == PRE_INC || code == POST_DEC)
6779 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6780 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6782 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6783 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6784 && GET_CODE (XEXP (x, 1)) == PLUS
6785 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6787 /* Thumb-2 only has autoincrement by constant. */
6788 rtx addend = XEXP (XEXP (x, 1), 1);
6789 HOST_WIDE_INT offset;
6791 if (!CONST_INT_P (addend))
6792 return 0;
6794 offset = INTVAL(addend);
6795 if (GET_MODE_SIZE (mode) <= 4)
6796 return (offset > -256 && offset < 256);
6798 return (use_ldrd && offset > -1024 && offset < 1024
6799 && (offset & 3) == 0);
6802 /* After reload constants split into minipools will have addresses
6803 from a LABEL_REF. */
6804 else if (reload_completed
6805 && (code == LABEL_REF
6806 || (code == CONST
6807 && GET_CODE (XEXP (x, 0)) == PLUS
6808 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6809 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6810 return 1;
6812 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6813 return 0;
6815 else if (code == PLUS)
6817 rtx xop0 = XEXP (x, 0);
6818 rtx xop1 = XEXP (x, 1);
6820 return ((arm_address_register_rtx_p (xop0, strict_p)
6821 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6822 || (!strict_p && will_be_in_index_register (xop1))))
6823 || (arm_address_register_rtx_p (xop1, strict_p)
6824 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6827 /* Normally we can assign constant values to target registers without
6828 the help of constant pool. But there are cases we have to use constant
6829 pool like:
6830 1) assign a label to register.
6831 2) sign-extend a 8bit value to 32bit and then assign to register.
6833 Constant pool access in format:
6834 (set (reg r0) (mem (symbol_ref (".LC0"))))
6835 will cause the use of literal pool (later in function arm_reorg).
6836 So here we mark such format as an invalid format, then the compiler
6837 will adjust it into:
6838 (set (reg r0) (symbol_ref (".LC0")))
6839 (set (reg r0) (mem (reg r0))).
6840 No extra register is required, and (mem (reg r0)) won't cause the use
6841 of literal pools. */
6842 else if (arm_disable_literal_pool && code == SYMBOL_REF
6843 && CONSTANT_POOL_ADDRESS_P (x))
6844 return 0;
6846 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6847 && code == SYMBOL_REF
6848 && CONSTANT_POOL_ADDRESS_P (x)
6849 && ! (flag_pic
6850 && symbol_mentioned_p (get_pool_constant (x))
6851 && ! pcrel_constant_p (get_pool_constant (x))))
6852 return 1;
6854 return 0;
6857 /* Return nonzero if INDEX is valid for an address index operand in
6858 ARM state. */
6859 static int
6860 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6861 int strict_p)
6863 HOST_WIDE_INT range;
6864 enum rtx_code code = GET_CODE (index);
6866 /* Standard coprocessor addressing modes. */
6867 if (TARGET_HARD_FLOAT
6868 && TARGET_VFP
6869 && (mode == SFmode || mode == DFmode))
6870 return (code == CONST_INT && INTVAL (index) < 1024
6871 && INTVAL (index) > -1024
6872 && (INTVAL (index) & 3) == 0);
6874 /* For quad modes, we restrict the constant offset to be slightly less
6875 than what the instruction format permits. We do this because for
6876 quad mode moves, we will actually decompose them into two separate
6877 double-mode reads or writes. INDEX must therefore be a valid
6878 (double-mode) offset and so should INDEX+8. */
6879 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6880 return (code == CONST_INT
6881 && INTVAL (index) < 1016
6882 && INTVAL (index) > -1024
6883 && (INTVAL (index) & 3) == 0);
6885 /* We have no such constraint on double mode offsets, so we permit the
6886 full range of the instruction format. */
6887 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6888 return (code == CONST_INT
6889 && INTVAL (index) < 1024
6890 && INTVAL (index) > -1024
6891 && (INTVAL (index) & 3) == 0);
6893 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6894 return (code == CONST_INT
6895 && INTVAL (index) < 1024
6896 && INTVAL (index) > -1024
6897 && (INTVAL (index) & 3) == 0);
6899 if (arm_address_register_rtx_p (index, strict_p)
6900 && (GET_MODE_SIZE (mode) <= 4))
6901 return 1;
6903 if (mode == DImode || mode == DFmode)
6905 if (code == CONST_INT)
6907 HOST_WIDE_INT val = INTVAL (index);
6909 if (TARGET_LDRD)
6910 return val > -256 && val < 256;
6911 else
6912 return val > -4096 && val < 4092;
6915 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6918 if (GET_MODE_SIZE (mode) <= 4
6919 && ! (arm_arch4
6920 && (mode == HImode
6921 || mode == HFmode
6922 || (mode == QImode && outer == SIGN_EXTEND))))
6924 if (code == MULT)
6926 rtx xiop0 = XEXP (index, 0);
6927 rtx xiop1 = XEXP (index, 1);
6929 return ((arm_address_register_rtx_p (xiop0, strict_p)
6930 && power_of_two_operand (xiop1, SImode))
6931 || (arm_address_register_rtx_p (xiop1, strict_p)
6932 && power_of_two_operand (xiop0, SImode)));
6934 else if (code == LSHIFTRT || code == ASHIFTRT
6935 || code == ASHIFT || code == ROTATERT)
6937 rtx op = XEXP (index, 1);
6939 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6940 && CONST_INT_P (op)
6941 && INTVAL (op) > 0
6942 && INTVAL (op) <= 31);
6946 /* For ARM v4 we may be doing a sign-extend operation during the
6947 load. */
6948 if (arm_arch4)
6950 if (mode == HImode
6951 || mode == HFmode
6952 || (outer == SIGN_EXTEND && mode == QImode))
6953 range = 256;
6954 else
6955 range = 4096;
6957 else
6958 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6960 return (code == CONST_INT
6961 && INTVAL (index) < range
6962 && INTVAL (index) > -range);
6965 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6966 index operand. i.e. 1, 2, 4 or 8. */
6967 static bool
6968 thumb2_index_mul_operand (rtx op)
6970 HOST_WIDE_INT val;
6972 if (!CONST_INT_P (op))
6973 return false;
6975 val = INTVAL(op);
6976 return (val == 1 || val == 2 || val == 4 || val == 8);
6979 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6980 static int
6981 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6983 enum rtx_code code = GET_CODE (index);
6985 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6986 /* Standard coprocessor addressing modes. */
6987 if (TARGET_HARD_FLOAT
6988 && TARGET_VFP
6989 && (mode == SFmode || mode == DFmode))
6990 return (code == CONST_INT && INTVAL (index) < 1024
6991 /* Thumb-2 allows only > -256 index range for it's core register
6992 load/stores. Since we allow SF/DF in core registers, we have
6993 to use the intersection between -256~4096 (core) and -1024~1024
6994 (coprocessor). */
6995 && INTVAL (index) > -256
6996 && (INTVAL (index) & 3) == 0);
6998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7000 /* For DImode assume values will usually live in core regs
7001 and only allow LDRD addressing modes. */
7002 if (!TARGET_LDRD || mode != DImode)
7003 return (code == CONST_INT
7004 && INTVAL (index) < 1024
7005 && INTVAL (index) > -1024
7006 && (INTVAL (index) & 3) == 0);
7009 /* For quad modes, we restrict the constant offset to be slightly less
7010 than what the instruction format permits. We do this because for
7011 quad mode moves, we will actually decompose them into two separate
7012 double-mode reads or writes. INDEX must therefore be a valid
7013 (double-mode) offset and so should INDEX+8. */
7014 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7015 return (code == CONST_INT
7016 && INTVAL (index) < 1016
7017 && INTVAL (index) > -1024
7018 && (INTVAL (index) & 3) == 0);
7020 /* We have no such constraint on double mode offsets, so we permit the
7021 full range of the instruction format. */
7022 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7023 return (code == CONST_INT
7024 && INTVAL (index) < 1024
7025 && INTVAL (index) > -1024
7026 && (INTVAL (index) & 3) == 0);
7028 if (arm_address_register_rtx_p (index, strict_p)
7029 && (GET_MODE_SIZE (mode) <= 4))
7030 return 1;
7032 if (mode == DImode || mode == DFmode)
7034 if (code == CONST_INT)
7036 HOST_WIDE_INT val = INTVAL (index);
7037 /* ??? Can we assume ldrd for thumb2? */
7038 /* Thumb-2 ldrd only has reg+const addressing modes. */
7039 /* ldrd supports offsets of +-1020.
7040 However the ldr fallback does not. */
7041 return val > -256 && val < 256 && (val & 3) == 0;
7043 else
7044 return 0;
7047 if (code == MULT)
7049 rtx xiop0 = XEXP (index, 0);
7050 rtx xiop1 = XEXP (index, 1);
7052 return ((arm_address_register_rtx_p (xiop0, strict_p)
7053 && thumb2_index_mul_operand (xiop1))
7054 || (arm_address_register_rtx_p (xiop1, strict_p)
7055 && thumb2_index_mul_operand (xiop0)));
7057 else if (code == ASHIFT)
7059 rtx op = XEXP (index, 1);
7061 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7062 && CONST_INT_P (op)
7063 && INTVAL (op) > 0
7064 && INTVAL (op) <= 3);
7067 return (code == CONST_INT
7068 && INTVAL (index) < 4096
7069 && INTVAL (index) > -256);
7072 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7073 static int
7074 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7076 int regno;
7078 if (!REG_P (x))
7079 return 0;
7081 regno = REGNO (x);
7083 if (strict_p)
7084 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7086 return (regno <= LAST_LO_REGNUM
7087 || regno > LAST_VIRTUAL_REGISTER
7088 || regno == FRAME_POINTER_REGNUM
7089 || (GET_MODE_SIZE (mode) >= 4
7090 && (regno == STACK_POINTER_REGNUM
7091 || regno >= FIRST_PSEUDO_REGISTER
7092 || x == hard_frame_pointer_rtx
7093 || x == arg_pointer_rtx)));
7096 /* Return nonzero if x is a legitimate index register. This is the case
7097 for any base register that can access a QImode object. */
7098 inline static int
7099 thumb1_index_register_rtx_p (rtx x, int strict_p)
7101 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7104 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7106 The AP may be eliminated to either the SP or the FP, so we use the
7107 least common denominator, e.g. SImode, and offsets from 0 to 64.
7109 ??? Verify whether the above is the right approach.
7111 ??? Also, the FP may be eliminated to the SP, so perhaps that
7112 needs special handling also.
7114 ??? Look at how the mips16 port solves this problem. It probably uses
7115 better ways to solve some of these problems.
7117 Although it is not incorrect, we don't accept QImode and HImode
7118 addresses based on the frame pointer or arg pointer until the
7119 reload pass starts. This is so that eliminating such addresses
7120 into stack based ones won't produce impossible code. */
7122 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7124 /* ??? Not clear if this is right. Experiment. */
7125 if (GET_MODE_SIZE (mode) < 4
7126 && !(reload_in_progress || reload_completed)
7127 && (reg_mentioned_p (frame_pointer_rtx, x)
7128 || reg_mentioned_p (arg_pointer_rtx, x)
7129 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7130 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7131 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7132 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7133 return 0;
7135 /* Accept any base register. SP only in SImode or larger. */
7136 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7137 return 1;
7139 /* This is PC relative data before arm_reorg runs. */
7140 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7141 && GET_CODE (x) == SYMBOL_REF
7142 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7143 return 1;
7145 /* This is PC relative data after arm_reorg runs. */
7146 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7147 && reload_completed
7148 && (GET_CODE (x) == LABEL_REF
7149 || (GET_CODE (x) == CONST
7150 && GET_CODE (XEXP (x, 0)) == PLUS
7151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7152 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7153 return 1;
7155 /* Post-inc indexing only supported for SImode and larger. */
7156 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7157 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7158 return 1;
7160 else if (GET_CODE (x) == PLUS)
7162 /* REG+REG address can be any two index registers. */
7163 /* We disallow FRAME+REG addressing since we know that FRAME
7164 will be replaced with STACK, and SP relative addressing only
7165 permits SP+OFFSET. */
7166 if (GET_MODE_SIZE (mode) <= 4
7167 && XEXP (x, 0) != frame_pointer_rtx
7168 && XEXP (x, 1) != frame_pointer_rtx
7169 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7170 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7171 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7172 return 1;
7174 /* REG+const has 5-7 bit offset for non-SP registers. */
7175 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7176 || XEXP (x, 0) == arg_pointer_rtx)
7177 && CONST_INT_P (XEXP (x, 1))
7178 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7179 return 1;
7181 /* REG+const has 10-bit offset for SP, but only SImode and
7182 larger is supported. */
7183 /* ??? Should probably check for DI/DFmode overflow here
7184 just like GO_IF_LEGITIMATE_OFFSET does. */
7185 else if (REG_P (XEXP (x, 0))
7186 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7187 && GET_MODE_SIZE (mode) >= 4
7188 && CONST_INT_P (XEXP (x, 1))
7189 && INTVAL (XEXP (x, 1)) >= 0
7190 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7191 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7192 return 1;
7194 else if (REG_P (XEXP (x, 0))
7195 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7196 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7197 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7198 && REGNO (XEXP (x, 0))
7199 <= LAST_VIRTUAL_POINTER_REGISTER))
7200 && GET_MODE_SIZE (mode) >= 4
7201 && CONST_INT_P (XEXP (x, 1))
7202 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7203 return 1;
7206 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7207 && GET_MODE_SIZE (mode) == 4
7208 && GET_CODE (x) == SYMBOL_REF
7209 && CONSTANT_POOL_ADDRESS_P (x)
7210 && ! (flag_pic
7211 && symbol_mentioned_p (get_pool_constant (x))
7212 && ! pcrel_constant_p (get_pool_constant (x))))
7213 return 1;
7215 return 0;
7218 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7219 instruction of mode MODE. */
7221 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7223 switch (GET_MODE_SIZE (mode))
7225 case 1:
7226 return val >= 0 && val < 32;
7228 case 2:
7229 return val >= 0 && val < 64 && (val & 1) == 0;
7231 default:
7232 return (val >= 0
7233 && (val + GET_MODE_SIZE (mode)) <= 128
7234 && (val & 3) == 0);
7238 bool
7239 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7241 if (TARGET_ARM)
7242 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7243 else if (TARGET_THUMB2)
7244 return thumb2_legitimate_address_p (mode, x, strict_p);
7245 else /* if (TARGET_THUMB1) */
7246 return thumb1_legitimate_address_p (mode, x, strict_p);
7249 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7251 Given an rtx X being reloaded into a reg required to be
7252 in class CLASS, return the class of reg to actually use.
7253 In general this is just CLASS, but for the Thumb core registers and
7254 immediate constants we prefer a LO_REGS class or a subset. */
7256 static reg_class_t
7257 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7259 if (TARGET_32BIT)
7260 return rclass;
7261 else
7263 if (rclass == GENERAL_REGS)
7264 return LO_REGS;
7265 else
7266 return rclass;
7270 /* Build the SYMBOL_REF for __tls_get_addr. */
7272 static GTY(()) rtx tls_get_addr_libfunc;
7274 static rtx
7275 get_tls_get_addr (void)
7277 if (!tls_get_addr_libfunc)
7278 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7279 return tls_get_addr_libfunc;
7283 arm_load_tp (rtx target)
7285 if (!target)
7286 target = gen_reg_rtx (SImode);
7288 if (TARGET_HARD_TP)
7290 /* Can return in any reg. */
7291 emit_insn (gen_load_tp_hard (target));
7293 else
7295 /* Always returned in r0. Immediately copy the result into a pseudo,
7296 otherwise other uses of r0 (e.g. setting up function arguments) may
7297 clobber the value. */
7299 rtx tmp;
7301 emit_insn (gen_load_tp_soft ());
7303 tmp = gen_rtx_REG (SImode, 0);
7304 emit_move_insn (target, tmp);
7306 return target;
7309 static rtx
7310 load_tls_operand (rtx x, rtx reg)
7312 rtx tmp;
7314 if (reg == NULL_RTX)
7315 reg = gen_reg_rtx (SImode);
7317 tmp = gen_rtx_CONST (SImode, x);
7319 emit_move_insn (reg, tmp);
7321 return reg;
7324 static rtx
7325 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7327 rtx insns, label, labelno, sum;
7329 gcc_assert (reloc != TLS_DESCSEQ);
7330 start_sequence ();
7332 labelno = GEN_INT (pic_labelno++);
7333 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7334 label = gen_rtx_CONST (VOIDmode, label);
7336 sum = gen_rtx_UNSPEC (Pmode,
7337 gen_rtvec (4, x, GEN_INT (reloc), label,
7338 GEN_INT (TARGET_ARM ? 8 : 4)),
7339 UNSPEC_TLS);
7340 reg = load_tls_operand (sum, reg);
7342 if (TARGET_ARM)
7343 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7344 else
7345 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7347 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7348 LCT_PURE, /* LCT_CONST? */
7349 Pmode, 1, reg, Pmode);
7351 insns = get_insns ();
7352 end_sequence ();
7354 return insns;
7357 static rtx
7358 arm_tls_descseq_addr (rtx x, rtx reg)
7360 rtx labelno = GEN_INT (pic_labelno++);
7361 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7362 rtx sum = gen_rtx_UNSPEC (Pmode,
7363 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7364 gen_rtx_CONST (VOIDmode, label),
7365 GEN_INT (!TARGET_ARM)),
7366 UNSPEC_TLS);
7367 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7369 emit_insn (gen_tlscall (x, labelno));
7370 if (!reg)
7371 reg = gen_reg_rtx (SImode);
7372 else
7373 gcc_assert (REGNO (reg) != 0);
7375 emit_move_insn (reg, reg0);
7377 return reg;
7381 legitimize_tls_address (rtx x, rtx reg)
7383 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7384 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7386 switch (model)
7388 case TLS_MODEL_GLOBAL_DYNAMIC:
7389 if (TARGET_GNU2_TLS)
7391 reg = arm_tls_descseq_addr (x, reg);
7393 tp = arm_load_tp (NULL_RTX);
7395 dest = gen_rtx_PLUS (Pmode, tp, reg);
7397 else
7399 /* Original scheme */
7400 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7401 dest = gen_reg_rtx (Pmode);
7402 emit_libcall_block (insns, dest, ret, x);
7404 return dest;
7406 case TLS_MODEL_LOCAL_DYNAMIC:
7407 if (TARGET_GNU2_TLS)
7409 reg = arm_tls_descseq_addr (x, reg);
7411 tp = arm_load_tp (NULL_RTX);
7413 dest = gen_rtx_PLUS (Pmode, tp, reg);
7415 else
7417 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7419 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7420 share the LDM result with other LD model accesses. */
7421 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7422 UNSPEC_TLS);
7423 dest = gen_reg_rtx (Pmode);
7424 emit_libcall_block (insns, dest, ret, eqv);
7426 /* Load the addend. */
7427 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7428 GEN_INT (TLS_LDO32)),
7429 UNSPEC_TLS);
7430 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7431 dest = gen_rtx_PLUS (Pmode, dest, addend);
7433 return dest;
7435 case TLS_MODEL_INITIAL_EXEC:
7436 labelno = GEN_INT (pic_labelno++);
7437 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7438 label = gen_rtx_CONST (VOIDmode, label);
7439 sum = gen_rtx_UNSPEC (Pmode,
7440 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7441 GEN_INT (TARGET_ARM ? 8 : 4)),
7442 UNSPEC_TLS);
7443 reg = load_tls_operand (sum, reg);
7445 if (TARGET_ARM)
7446 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7447 else if (TARGET_THUMB2)
7448 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7449 else
7451 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7452 emit_move_insn (reg, gen_const_mem (SImode, reg));
7455 tp = arm_load_tp (NULL_RTX);
7457 return gen_rtx_PLUS (Pmode, tp, reg);
7459 case TLS_MODEL_LOCAL_EXEC:
7460 tp = arm_load_tp (NULL_RTX);
7462 reg = gen_rtx_UNSPEC (Pmode,
7463 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7464 UNSPEC_TLS);
7465 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7467 return gen_rtx_PLUS (Pmode, tp, reg);
7469 default:
7470 abort ();
7474 /* Try machine-dependent ways of modifying an illegitimate address
7475 to be legitimate. If we find one, return the new, valid address. */
7477 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7479 if (arm_tls_referenced_p (x))
7481 rtx addend = NULL;
7483 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7485 addend = XEXP (XEXP (x, 0), 1);
7486 x = XEXP (XEXP (x, 0), 0);
7489 if (GET_CODE (x) != SYMBOL_REF)
7490 return x;
7492 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7494 x = legitimize_tls_address (x, NULL_RTX);
7496 if (addend)
7498 x = gen_rtx_PLUS (SImode, x, addend);
7499 orig_x = x;
7501 else
7502 return x;
7505 if (!TARGET_ARM)
7507 /* TODO: legitimize_address for Thumb2. */
7508 if (TARGET_THUMB2)
7509 return x;
7510 return thumb_legitimize_address (x, orig_x, mode);
7513 if (GET_CODE (x) == PLUS)
7515 rtx xop0 = XEXP (x, 0);
7516 rtx xop1 = XEXP (x, 1);
7518 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7519 xop0 = force_reg (SImode, xop0);
7521 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7522 && !symbol_mentioned_p (xop1))
7523 xop1 = force_reg (SImode, xop1);
7525 if (ARM_BASE_REGISTER_RTX_P (xop0)
7526 && CONST_INT_P (xop1))
7528 HOST_WIDE_INT n, low_n;
7529 rtx base_reg, val;
7530 n = INTVAL (xop1);
7532 /* VFP addressing modes actually allow greater offsets, but for
7533 now we just stick with the lowest common denominator. */
7534 if (mode == DImode
7535 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7537 low_n = n & 0x0f;
7538 n &= ~0x0f;
7539 if (low_n > 4)
7541 n += 16;
7542 low_n -= 16;
7545 else
7547 low_n = ((mode) == TImode ? 0
7548 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7549 n -= low_n;
7552 base_reg = gen_reg_rtx (SImode);
7553 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7554 emit_move_insn (base_reg, val);
7555 x = plus_constant (Pmode, base_reg, low_n);
7557 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7558 x = gen_rtx_PLUS (SImode, xop0, xop1);
7561 /* XXX We don't allow MINUS any more -- see comment in
7562 arm_legitimate_address_outer_p (). */
7563 else if (GET_CODE (x) == MINUS)
7565 rtx xop0 = XEXP (x, 0);
7566 rtx xop1 = XEXP (x, 1);
7568 if (CONSTANT_P (xop0))
7569 xop0 = force_reg (SImode, xop0);
7571 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7572 xop1 = force_reg (SImode, xop1);
7574 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7575 x = gen_rtx_MINUS (SImode, xop0, xop1);
7578 /* Make sure to take full advantage of the pre-indexed addressing mode
7579 with absolute addresses which often allows for the base register to
7580 be factorized for multiple adjacent memory references, and it might
7581 even allows for the mini pool to be avoided entirely. */
7582 else if (CONST_INT_P (x) && optimize > 0)
7584 unsigned int bits;
7585 HOST_WIDE_INT mask, base, index;
7586 rtx base_reg;
7588 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7589 use a 8-bit index. So let's use a 12-bit index for SImode only and
7590 hope that arm_gen_constant will enable ldrb to use more bits. */
7591 bits = (mode == SImode) ? 12 : 8;
7592 mask = (1 << bits) - 1;
7593 base = INTVAL (x) & ~mask;
7594 index = INTVAL (x) & mask;
7595 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7597 /* It'll most probably be more efficient to generate the base
7598 with more bits set and use a negative index instead. */
7599 base |= mask;
7600 index -= mask;
7602 base_reg = force_reg (SImode, GEN_INT (base));
7603 x = plus_constant (Pmode, base_reg, index);
7606 if (flag_pic)
7608 /* We need to find and carefully transform any SYMBOL and LABEL
7609 references; so go back to the original address expression. */
7610 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7612 if (new_x != orig_x)
7613 x = new_x;
7616 return x;
7620 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7621 to be legitimate. If we find one, return the new, valid address. */
7623 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7625 if (GET_CODE (x) == PLUS
7626 && CONST_INT_P (XEXP (x, 1))
7627 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7628 || INTVAL (XEXP (x, 1)) < 0))
7630 rtx xop0 = XEXP (x, 0);
7631 rtx xop1 = XEXP (x, 1);
7632 HOST_WIDE_INT offset = INTVAL (xop1);
7634 /* Try and fold the offset into a biasing of the base register and
7635 then offsetting that. Don't do this when optimizing for space
7636 since it can cause too many CSEs. */
7637 if (optimize_size && offset >= 0
7638 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7640 HOST_WIDE_INT delta;
7642 if (offset >= 256)
7643 delta = offset - (256 - GET_MODE_SIZE (mode));
7644 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7645 delta = 31 * GET_MODE_SIZE (mode);
7646 else
7647 delta = offset & (~31 * GET_MODE_SIZE (mode));
7649 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7650 NULL_RTX);
7651 x = plus_constant (Pmode, xop0, delta);
7653 else if (offset < 0 && offset > -256)
7654 /* Small negative offsets are best done with a subtract before the
7655 dereference, forcing these into a register normally takes two
7656 instructions. */
7657 x = force_operand (x, NULL_RTX);
7658 else
7660 /* For the remaining cases, force the constant into a register. */
7661 xop1 = force_reg (SImode, xop1);
7662 x = gen_rtx_PLUS (SImode, xop0, xop1);
7665 else if (GET_CODE (x) == PLUS
7666 && s_register_operand (XEXP (x, 1), SImode)
7667 && !s_register_operand (XEXP (x, 0), SImode))
7669 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7671 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7674 if (flag_pic)
7676 /* We need to find and carefully transform any SYMBOL and LABEL
7677 references; so go back to the original address expression. */
7678 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7680 if (new_x != orig_x)
7681 x = new_x;
7684 return x;
7687 bool
7688 arm_legitimize_reload_address (rtx *p,
7689 enum machine_mode mode,
7690 int opnum, int type,
7691 int ind_levels ATTRIBUTE_UNUSED)
7693 /* We must recognize output that we have already generated ourselves. */
7694 if (GET_CODE (*p) == PLUS
7695 && GET_CODE (XEXP (*p, 0)) == PLUS
7696 && REG_P (XEXP (XEXP (*p, 0), 0))
7697 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7698 && CONST_INT_P (XEXP (*p, 1)))
7700 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7701 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7702 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7703 return true;
7706 if (GET_CODE (*p) == PLUS
7707 && REG_P (XEXP (*p, 0))
7708 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7709 /* If the base register is equivalent to a constant, let the generic
7710 code handle it. Otherwise we will run into problems if a future
7711 reload pass decides to rematerialize the constant. */
7712 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7713 && CONST_INT_P (XEXP (*p, 1)))
7715 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7716 HOST_WIDE_INT low, high;
7718 /* Detect coprocessor load/stores. */
7719 bool coproc_p = ((TARGET_HARD_FLOAT
7720 && TARGET_VFP
7721 && (mode == SFmode || mode == DFmode))
7722 || (TARGET_REALLY_IWMMXT
7723 && VALID_IWMMXT_REG_MODE (mode))
7724 || (TARGET_NEON
7725 && (VALID_NEON_DREG_MODE (mode)
7726 || VALID_NEON_QREG_MODE (mode))));
7728 /* For some conditions, bail out when lower two bits are unaligned. */
7729 if ((val & 0x3) != 0
7730 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7731 && (coproc_p
7732 /* For DI, and DF under soft-float: */
7733 || ((mode == DImode || mode == DFmode)
7734 /* Without ldrd, we use stm/ldm, which does not
7735 fair well with unaligned bits. */
7736 && (! TARGET_LDRD
7737 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7738 || TARGET_THUMB2))))
7739 return false;
7741 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7742 of which the (reg+high) gets turned into a reload add insn,
7743 we try to decompose the index into high/low values that can often
7744 also lead to better reload CSE.
7745 For example:
7746 ldr r0, [r2, #4100] // Offset too large
7747 ldr r1, [r2, #4104] // Offset too large
7749 is best reloaded as:
7750 add t1, r2, #4096
7751 ldr r0, [t1, #4]
7752 add t2, r2, #4096
7753 ldr r1, [t2, #8]
7755 which post-reload CSE can simplify in most cases to eliminate the
7756 second add instruction:
7757 add t1, r2, #4096
7758 ldr r0, [t1, #4]
7759 ldr r1, [t1, #8]
7761 The idea here is that we want to split out the bits of the constant
7762 as a mask, rather than as subtracting the maximum offset that the
7763 respective type of load/store used can handle.
7765 When encountering negative offsets, we can still utilize it even if
7766 the overall offset is positive; sometimes this may lead to an immediate
7767 that can be constructed with fewer instructions.
7768 For example:
7769 ldr r0, [r2, #0x3FFFFC]
7771 This is best reloaded as:
7772 add t1, r2, #0x400000
7773 ldr r0, [t1, #-4]
7775 The trick for spotting this for a load insn with N bits of offset
7776 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7777 negative offset that is going to make bit N and all the bits below
7778 it become zero in the remainder part.
7780 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7781 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7782 used in most cases of ARM load/store instructions. */
7784 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7785 (((VAL) & ((1 << (N)) - 1)) \
7786 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7787 : 0)
7789 if (coproc_p)
7791 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7793 /* NEON quad-word load/stores are made of two double-word accesses,
7794 so the valid index range is reduced by 8. Treat as 9-bit range if
7795 we go over it. */
7796 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7797 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7799 else if (GET_MODE_SIZE (mode) == 8)
7801 if (TARGET_LDRD)
7802 low = (TARGET_THUMB2
7803 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7804 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7805 else
7806 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7807 to access doublewords. The supported load/store offsets are
7808 -8, -4, and 4, which we try to produce here. */
7809 low = ((val & 0xf) ^ 0x8) - 0x8;
7811 else if (GET_MODE_SIZE (mode) < 8)
7813 /* NEON element load/stores do not have an offset. */
7814 if (TARGET_NEON_FP16 && mode == HFmode)
7815 return false;
7817 if (TARGET_THUMB2)
7819 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7820 Try the wider 12-bit range first, and re-try if the result
7821 is out of range. */
7822 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7823 if (low < -255)
7824 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7826 else
7828 if (mode == HImode || mode == HFmode)
7830 if (arm_arch4)
7831 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7832 else
7834 /* The storehi/movhi_bytes fallbacks can use only
7835 [-4094,+4094] of the full ldrb/strb index range. */
7836 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7837 if (low == 4095 || low == -4095)
7838 return false;
7841 else
7842 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7845 else
7846 return false;
7848 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7849 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7850 - (unsigned HOST_WIDE_INT) 0x80000000);
7851 /* Check for overflow or zero */
7852 if (low == 0 || high == 0 || (high + low != val))
7853 return false;
7855 /* Reload the high part into a base reg; leave the low part
7856 in the mem.
7857 Note that replacing this gen_rtx_PLUS with plus_constant is
7858 wrong in this case because we rely on the
7859 (plus (plus reg c1) c2) structure being preserved so that
7860 XEXP (*p, 0) in push_reload below uses the correct term. */
7861 *p = gen_rtx_PLUS (GET_MODE (*p),
7862 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7863 GEN_INT (high)),
7864 GEN_INT (low));
7865 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7866 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7867 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7868 return true;
7871 return false;
7875 thumb_legitimize_reload_address (rtx *x_p,
7876 enum machine_mode mode,
7877 int opnum, int type,
7878 int ind_levels ATTRIBUTE_UNUSED)
7880 rtx x = *x_p;
7882 if (GET_CODE (x) == PLUS
7883 && GET_MODE_SIZE (mode) < 4
7884 && REG_P (XEXP (x, 0))
7885 && XEXP (x, 0) == stack_pointer_rtx
7886 && CONST_INT_P (XEXP (x, 1))
7887 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7889 rtx orig_x = x;
7891 x = copy_rtx (x);
7892 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7893 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7894 return x;
7897 /* If both registers are hi-regs, then it's better to reload the
7898 entire expression rather than each register individually. That
7899 only requires one reload register rather than two. */
7900 if (GET_CODE (x) == PLUS
7901 && REG_P (XEXP (x, 0))
7902 && REG_P (XEXP (x, 1))
7903 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7904 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7906 rtx orig_x = x;
7908 x = copy_rtx (x);
7909 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7910 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7911 return x;
7914 return NULL;
7917 /* Test for various thread-local symbols. */
7919 /* Helper for arm_tls_referenced_p. */
7921 static int
7922 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7924 if (GET_CODE (*x) == SYMBOL_REF)
7925 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7927 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7928 TLS offsets, not real symbol references. */
7929 if (GET_CODE (*x) == UNSPEC
7930 && XINT (*x, 1) == UNSPEC_TLS)
7931 return -1;
7933 return 0;
7936 /* Return TRUE if X contains any TLS symbol references. */
7938 bool
7939 arm_tls_referenced_p (rtx x)
7941 if (! TARGET_HAVE_TLS)
7942 return false;
7944 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7947 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7949 On the ARM, allow any integer (invalid ones are removed later by insn
7950 patterns), nice doubles and symbol_refs which refer to the function's
7951 constant pool XXX.
7953 When generating pic allow anything. */
7955 static bool
7956 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7958 /* At present, we have no support for Neon structure constants, so forbid
7959 them here. It might be possible to handle simple cases like 0 and -1
7960 in future. */
7961 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7962 return false;
7964 return flag_pic || !label_mentioned_p (x);
7967 static bool
7968 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7970 return (CONST_INT_P (x)
7971 || CONST_DOUBLE_P (x)
7972 || CONSTANT_ADDRESS_P (x)
7973 || flag_pic);
7976 static bool
7977 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7979 return (!arm_cannot_force_const_mem (mode, x)
7980 && (TARGET_32BIT
7981 ? arm_legitimate_constant_p_1 (mode, x)
7982 : thumb_legitimate_constant_p (mode, x)));
7985 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7987 static bool
7988 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7990 rtx base, offset;
7992 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7994 split_const (x, &base, &offset);
7995 if (GET_CODE (base) == SYMBOL_REF
7996 && !offset_within_block_p (base, INTVAL (offset)))
7997 return true;
7999 return arm_tls_referenced_p (x);
8002 #define REG_OR_SUBREG_REG(X) \
8003 (REG_P (X) \
8004 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8006 #define REG_OR_SUBREG_RTX(X) \
8007 (REG_P (X) ? (X) : SUBREG_REG (X))
8009 static inline int
8010 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8012 enum machine_mode mode = GET_MODE (x);
8013 int total, words;
8015 switch (code)
8017 case ASHIFT:
8018 case ASHIFTRT:
8019 case LSHIFTRT:
8020 case ROTATERT:
8021 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8023 case PLUS:
8024 case MINUS:
8025 case COMPARE:
8026 case NEG:
8027 case NOT:
8028 return COSTS_N_INSNS (1);
8030 case MULT:
8031 if (CONST_INT_P (XEXP (x, 1)))
8033 int cycles = 0;
8034 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8036 while (i)
8038 i >>= 2;
8039 cycles++;
8041 return COSTS_N_INSNS (2) + cycles;
8043 return COSTS_N_INSNS (1) + 16;
8045 case SET:
8046 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8047 the mode. */
8048 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8049 return (COSTS_N_INSNS (words)
8050 + 4 * ((MEM_P (SET_SRC (x)))
8051 + MEM_P (SET_DEST (x))));
8053 case CONST_INT:
8054 if (outer == SET)
8056 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8057 return 0;
8058 if (thumb_shiftable_const (INTVAL (x)))
8059 return COSTS_N_INSNS (2);
8060 return COSTS_N_INSNS (3);
8062 else if ((outer == PLUS || outer == COMPARE)
8063 && INTVAL (x) < 256 && INTVAL (x) > -256)
8064 return 0;
8065 else if ((outer == IOR || outer == XOR || outer == AND)
8066 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8067 return COSTS_N_INSNS (1);
8068 else if (outer == AND)
8070 int i;
8071 /* This duplicates the tests in the andsi3 expander. */
8072 for (i = 9; i <= 31; i++)
8073 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8074 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8075 return COSTS_N_INSNS (2);
8077 else if (outer == ASHIFT || outer == ASHIFTRT
8078 || outer == LSHIFTRT)
8079 return 0;
8080 return COSTS_N_INSNS (2);
8082 case CONST:
8083 case CONST_DOUBLE:
8084 case LABEL_REF:
8085 case SYMBOL_REF:
8086 return COSTS_N_INSNS (3);
8088 case UDIV:
8089 case UMOD:
8090 case DIV:
8091 case MOD:
8092 return 100;
8094 case TRUNCATE:
8095 return 99;
8097 case AND:
8098 case XOR:
8099 case IOR:
8100 /* XXX guess. */
8101 return 8;
8103 case MEM:
8104 /* XXX another guess. */
8105 /* Memory costs quite a lot for the first word, but subsequent words
8106 load at the equivalent of a single insn each. */
8107 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8108 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8109 ? 4 : 0));
8111 case IF_THEN_ELSE:
8112 /* XXX a guess. */
8113 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8114 return 14;
8115 return 2;
8117 case SIGN_EXTEND:
8118 case ZERO_EXTEND:
8119 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8120 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8122 if (mode == SImode)
8123 return total;
8125 if (arm_arch6)
8126 return total + COSTS_N_INSNS (1);
8128 /* Assume a two-shift sequence. Increase the cost slightly so
8129 we prefer actual shifts over an extend operation. */
8130 return total + 1 + COSTS_N_INSNS (2);
8132 default:
8133 return 99;
8137 static inline bool
8138 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8140 enum machine_mode mode = GET_MODE (x);
8141 enum rtx_code subcode;
8142 rtx operand;
8143 enum rtx_code code = GET_CODE (x);
8144 *total = 0;
8146 switch (code)
8148 case MEM:
8149 /* Memory costs quite a lot for the first word, but subsequent words
8150 load at the equivalent of a single insn each. */
8151 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8152 return true;
8154 case DIV:
8155 case MOD:
8156 case UDIV:
8157 case UMOD:
8158 if (TARGET_HARD_FLOAT && mode == SFmode)
8159 *total = COSTS_N_INSNS (2);
8160 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8161 *total = COSTS_N_INSNS (4);
8162 else
8163 *total = COSTS_N_INSNS (20);
8164 return false;
8166 case ROTATE:
8167 if (REG_P (XEXP (x, 1)))
8168 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8169 else if (!CONST_INT_P (XEXP (x, 1)))
8170 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8172 /* Fall through */
8173 case ROTATERT:
8174 if (mode != SImode)
8176 *total += COSTS_N_INSNS (4);
8177 return true;
8180 /* Fall through */
8181 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8182 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8183 if (mode == DImode)
8185 *total += COSTS_N_INSNS (3);
8186 return true;
8189 *total += COSTS_N_INSNS (1);
8190 /* Increase the cost of complex shifts because they aren't any faster,
8191 and reduce dual issue opportunities. */
8192 if (arm_tune_cortex_a9
8193 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8194 ++*total;
8196 return true;
8198 case MINUS:
8199 if (mode == DImode)
8201 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8202 if (CONST_INT_P (XEXP (x, 0))
8203 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8205 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8206 return true;
8209 if (CONST_INT_P (XEXP (x, 1))
8210 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8212 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8213 return true;
8216 return false;
8219 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8221 if (TARGET_HARD_FLOAT
8222 && (mode == SFmode
8223 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8225 *total = COSTS_N_INSNS (1);
8226 if (CONST_DOUBLE_P (XEXP (x, 0))
8227 && arm_const_double_rtx (XEXP (x, 0)))
8229 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8230 return true;
8233 if (CONST_DOUBLE_P (XEXP (x, 1))
8234 && arm_const_double_rtx (XEXP (x, 1)))
8236 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8237 return true;
8240 return false;
8242 *total = COSTS_N_INSNS (20);
8243 return false;
8246 *total = COSTS_N_INSNS (1);
8247 if (CONST_INT_P (XEXP (x, 0))
8248 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8250 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8251 return true;
8254 subcode = GET_CODE (XEXP (x, 1));
8255 if (subcode == ASHIFT || subcode == ASHIFTRT
8256 || subcode == LSHIFTRT
8257 || subcode == ROTATE || subcode == ROTATERT)
8259 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8260 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8261 return true;
8264 /* A shift as a part of RSB costs no more than RSB itself. */
8265 if (GET_CODE (XEXP (x, 0)) == MULT
8266 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8268 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8269 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8270 return true;
8273 if (subcode == MULT
8274 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8276 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8277 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8278 return true;
8281 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8282 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8284 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8285 if (REG_P (XEXP (XEXP (x, 1), 0))
8286 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8287 *total += COSTS_N_INSNS (1);
8289 return true;
8292 /* Fall through */
8294 case PLUS:
8295 if (code == PLUS && arm_arch6 && mode == SImode
8296 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8297 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8299 *total = COSTS_N_INSNS (1);
8300 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8301 0, speed);
8302 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8303 return true;
8306 /* MLA: All arguments must be registers. We filter out
8307 multiplication by a power of two, so that we fall down into
8308 the code below. */
8309 if (GET_CODE (XEXP (x, 0)) == MULT
8310 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8312 /* The cost comes from the cost of the multiply. */
8313 return false;
8316 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8318 if (TARGET_HARD_FLOAT
8319 && (mode == SFmode
8320 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8322 *total = COSTS_N_INSNS (1);
8323 if (CONST_DOUBLE_P (XEXP (x, 1))
8324 && arm_const_double_rtx (XEXP (x, 1)))
8326 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8327 return true;
8330 return false;
8333 *total = COSTS_N_INSNS (20);
8334 return false;
8337 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8338 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8340 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8341 if (REG_P (XEXP (XEXP (x, 0), 0))
8342 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8343 *total += COSTS_N_INSNS (1);
8344 return true;
8347 /* Fall through */
8349 case AND: case XOR: case IOR:
8351 /* Normally the frame registers will be spilt into reg+const during
8352 reload, so it is a bad idea to combine them with other instructions,
8353 since then they might not be moved outside of loops. As a compromise
8354 we allow integration with ops that have a constant as their second
8355 operand. */
8356 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8357 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8358 && !CONST_INT_P (XEXP (x, 1)))
8359 *total = COSTS_N_INSNS (1);
8361 if (mode == DImode)
8363 *total += COSTS_N_INSNS (2);
8364 if (CONST_INT_P (XEXP (x, 1))
8365 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8367 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8368 return true;
8371 return false;
8374 *total += COSTS_N_INSNS (1);
8375 if (CONST_INT_P (XEXP (x, 1))
8376 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8378 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8379 return true;
8381 subcode = GET_CODE (XEXP (x, 0));
8382 if (subcode == ASHIFT || subcode == ASHIFTRT
8383 || subcode == LSHIFTRT
8384 || subcode == ROTATE || subcode == ROTATERT)
8386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8388 return true;
8391 if (subcode == MULT
8392 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8394 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8395 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8396 return true;
8399 if (subcode == UMIN || subcode == UMAX
8400 || subcode == SMIN || subcode == SMAX)
8402 *total = COSTS_N_INSNS (3);
8403 return true;
8406 return false;
8408 case MULT:
8409 /* This should have been handled by the CPU specific routines. */
8410 gcc_unreachable ();
8412 case TRUNCATE:
8413 if (arm_arch3m && mode == SImode
8414 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8416 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8417 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8418 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8419 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8421 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8422 return true;
8424 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8425 return false;
8427 case NEG:
8428 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8430 if (TARGET_HARD_FLOAT
8431 && (mode == SFmode
8432 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8434 *total = COSTS_N_INSNS (1);
8435 return false;
8437 *total = COSTS_N_INSNS (2);
8438 return false;
8441 /* Fall through */
8442 case NOT:
8443 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8444 if (mode == SImode && code == NOT)
8446 subcode = GET_CODE (XEXP (x, 0));
8447 if (subcode == ASHIFT || subcode == ASHIFTRT
8448 || subcode == LSHIFTRT
8449 || subcode == ROTATE || subcode == ROTATERT
8450 || (subcode == MULT
8451 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8453 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8454 /* Register shifts cost an extra cycle. */
8455 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8456 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8457 subcode, 1, speed);
8458 return true;
8462 return false;
8464 case IF_THEN_ELSE:
8465 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8467 *total = COSTS_N_INSNS (4);
8468 return true;
8471 operand = XEXP (x, 0);
8473 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8474 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8475 && REG_P (XEXP (operand, 0))
8476 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8477 *total += COSTS_N_INSNS (1);
8478 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8479 + rtx_cost (XEXP (x, 2), code, 2, speed));
8480 return true;
8482 case NE:
8483 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8485 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8486 return true;
8488 goto scc_insn;
8490 case GE:
8491 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8492 && mode == SImode && XEXP (x, 1) == const0_rtx)
8494 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8495 return true;
8497 goto scc_insn;
8499 case LT:
8500 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8501 && mode == SImode && XEXP (x, 1) == const0_rtx)
8503 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8504 return true;
8506 goto scc_insn;
8508 case EQ:
8509 case GT:
8510 case LE:
8511 case GEU:
8512 case LTU:
8513 case GTU:
8514 case LEU:
8515 case UNORDERED:
8516 case ORDERED:
8517 case UNEQ:
8518 case UNGE:
8519 case UNLT:
8520 case UNGT:
8521 case UNLE:
8522 scc_insn:
8523 /* SCC insns. In the case where the comparison has already been
8524 performed, then they cost 2 instructions. Otherwise they need
8525 an additional comparison before them. */
8526 *total = COSTS_N_INSNS (2);
8527 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8529 return true;
8532 /* Fall through */
8533 case COMPARE:
8534 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8536 *total = 0;
8537 return true;
8540 *total += COSTS_N_INSNS (1);
8541 if (CONST_INT_P (XEXP (x, 1))
8542 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8544 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8545 return true;
8548 subcode = GET_CODE (XEXP (x, 0));
8549 if (subcode == ASHIFT || subcode == ASHIFTRT
8550 || subcode == LSHIFTRT
8551 || subcode == ROTATE || subcode == ROTATERT)
8553 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8555 return true;
8558 if (subcode == MULT
8559 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8561 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8562 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8563 return true;
8566 return false;
8568 case UMIN:
8569 case UMAX:
8570 case SMIN:
8571 case SMAX:
8572 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8573 if (!CONST_INT_P (XEXP (x, 1))
8574 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8575 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8576 return true;
8578 case ABS:
8579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8581 if (TARGET_HARD_FLOAT
8582 && (mode == SFmode
8583 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8585 *total = COSTS_N_INSNS (1);
8586 return false;
8588 *total = COSTS_N_INSNS (20);
8589 return false;
8591 *total = COSTS_N_INSNS (1);
8592 if (mode == DImode)
8593 *total += COSTS_N_INSNS (3);
8594 return false;
8596 case SIGN_EXTEND:
8597 case ZERO_EXTEND:
8598 *total = 0;
8599 if (GET_MODE_CLASS (mode) == MODE_INT)
8601 rtx op = XEXP (x, 0);
8602 enum machine_mode opmode = GET_MODE (op);
8604 if (mode == DImode)
8605 *total += COSTS_N_INSNS (1);
8607 if (opmode != SImode)
8609 if (MEM_P (op))
8611 /* If !arm_arch4, we use one of the extendhisi2_mem
8612 or movhi_bytes patterns for HImode. For a QImode
8613 sign extension, we first zero-extend from memory
8614 and then perform a shift sequence. */
8615 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8616 *total += COSTS_N_INSNS (2);
8618 else if (arm_arch6)
8619 *total += COSTS_N_INSNS (1);
8621 /* We don't have the necessary insn, so we need to perform some
8622 other operation. */
8623 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8624 /* An and with constant 255. */
8625 *total += COSTS_N_INSNS (1);
8626 else
8627 /* A shift sequence. Increase costs slightly to avoid
8628 combining two shifts into an extend operation. */
8629 *total += COSTS_N_INSNS (2) + 1;
8632 return false;
8635 switch (GET_MODE (XEXP (x, 0)))
8637 case V8QImode:
8638 case V4HImode:
8639 case V2SImode:
8640 case V4QImode:
8641 case V2HImode:
8642 *total = COSTS_N_INSNS (1);
8643 return false;
8645 default:
8646 gcc_unreachable ();
8648 gcc_unreachable ();
8650 case ZERO_EXTRACT:
8651 case SIGN_EXTRACT:
8652 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8653 return true;
8655 case CONST_INT:
8656 if (const_ok_for_arm (INTVAL (x))
8657 || const_ok_for_arm (~INTVAL (x)))
8658 *total = COSTS_N_INSNS (1);
8659 else
8660 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8661 INTVAL (x), NULL_RTX,
8662 NULL_RTX, 0, 0));
8663 return true;
8665 case CONST:
8666 case LABEL_REF:
8667 case SYMBOL_REF:
8668 *total = COSTS_N_INSNS (3);
8669 return true;
8671 case HIGH:
8672 *total = COSTS_N_INSNS (1);
8673 return true;
8675 case LO_SUM:
8676 *total = COSTS_N_INSNS (1);
8677 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8678 return true;
8680 case CONST_DOUBLE:
8681 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8682 && (mode == SFmode || !TARGET_VFP_SINGLE))
8683 *total = COSTS_N_INSNS (1);
8684 else
8685 *total = COSTS_N_INSNS (4);
8686 return true;
8688 case SET:
8689 /* The vec_extract patterns accept memory operands that require an
8690 address reload. Account for the cost of that reload to give the
8691 auto-inc-dec pass an incentive to try to replace them. */
8692 if (TARGET_NEON && MEM_P (SET_DEST (x))
8693 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8695 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8696 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8697 *total += COSTS_N_INSNS (1);
8698 return true;
8700 /* Likewise for the vec_set patterns. */
8701 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8702 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8703 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8705 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8706 *total = rtx_cost (mem, code, 0, speed);
8707 if (!neon_vector_mem_operand (mem, 2, true))
8708 *total += COSTS_N_INSNS (1);
8709 return true;
8711 return false;
8713 case UNSPEC:
8714 /* We cost this as high as our memory costs to allow this to
8715 be hoisted from loops. */
8716 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8718 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8720 return true;
8722 case CONST_VECTOR:
8723 if (TARGET_NEON
8724 && TARGET_HARD_FLOAT
8725 && outer == SET
8726 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8727 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8728 *total = COSTS_N_INSNS (1);
8729 else
8730 *total = COSTS_N_INSNS (4);
8731 return true;
8733 default:
8734 *total = COSTS_N_INSNS (4);
8735 return false;
8739 /* Estimates the size cost of thumb1 instructions.
8740 For now most of the code is copied from thumb1_rtx_costs. We need more
8741 fine grain tuning when we have more related test cases. */
8742 static inline int
8743 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8745 enum machine_mode mode = GET_MODE (x);
8746 int words;
8748 switch (code)
8750 case ASHIFT:
8751 case ASHIFTRT:
8752 case LSHIFTRT:
8753 case ROTATERT:
8754 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8756 case PLUS:
8757 case MINUS:
8758 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8759 defined by RTL expansion, especially for the expansion of
8760 multiplication. */
8761 if ((GET_CODE (XEXP (x, 0)) == MULT
8762 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8763 || (GET_CODE (XEXP (x, 1)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8765 return COSTS_N_INSNS (2);
8766 /* On purpose fall through for normal RTX. */
8767 case COMPARE:
8768 case NEG:
8769 case NOT:
8770 return COSTS_N_INSNS (1);
8772 case MULT:
8773 if (CONST_INT_P (XEXP (x, 1)))
8775 /* Thumb1 mul instruction can't operate on const. We must Load it
8776 into a register first. */
8777 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8778 return COSTS_N_INSNS (1) + const_size;
8780 return COSTS_N_INSNS (1);
8782 case SET:
8783 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8784 the mode. */
8785 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8786 return (COSTS_N_INSNS (words)
8787 + 4 * ((MEM_P (SET_SRC (x)))
8788 + MEM_P (SET_DEST (x))));
8790 case CONST_INT:
8791 if (outer == SET)
8793 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8794 return COSTS_N_INSNS (1);
8795 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8796 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8797 return COSTS_N_INSNS (2);
8798 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8799 if (thumb_shiftable_const (INTVAL (x)))
8800 return COSTS_N_INSNS (2);
8801 return COSTS_N_INSNS (3);
8803 else if ((outer == PLUS || outer == COMPARE)
8804 && INTVAL (x) < 256 && INTVAL (x) > -256)
8805 return 0;
8806 else if ((outer == IOR || outer == XOR || outer == AND)
8807 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8808 return COSTS_N_INSNS (1);
8809 else if (outer == AND)
8811 int i;
8812 /* This duplicates the tests in the andsi3 expander. */
8813 for (i = 9; i <= 31; i++)
8814 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8815 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8816 return COSTS_N_INSNS (2);
8818 else if (outer == ASHIFT || outer == ASHIFTRT
8819 || outer == LSHIFTRT)
8820 return 0;
8821 return COSTS_N_INSNS (2);
8823 case CONST:
8824 case CONST_DOUBLE:
8825 case LABEL_REF:
8826 case SYMBOL_REF:
8827 return COSTS_N_INSNS (3);
8829 case UDIV:
8830 case UMOD:
8831 case DIV:
8832 case MOD:
8833 return 100;
8835 case TRUNCATE:
8836 return 99;
8838 case AND:
8839 case XOR:
8840 case IOR:
8841 /* XXX guess. */
8842 return 8;
8844 case MEM:
8845 /* XXX another guess. */
8846 /* Memory costs quite a lot for the first word, but subsequent words
8847 load at the equivalent of a single insn each. */
8848 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8849 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8850 ? 4 : 0));
8852 case IF_THEN_ELSE:
8853 /* XXX a guess. */
8854 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8855 return 14;
8856 return 2;
8858 case ZERO_EXTEND:
8859 /* XXX still guessing. */
8860 switch (GET_MODE (XEXP (x, 0)))
8862 case QImode:
8863 return (1 + (mode == DImode ? 4 : 0)
8864 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8866 case HImode:
8867 return (4 + (mode == DImode ? 4 : 0)
8868 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8870 case SImode:
8871 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8873 default:
8874 return 99;
8877 default:
8878 return 99;
8882 /* RTX costs when optimizing for size. */
8883 static bool
8884 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8885 int *total)
8887 enum machine_mode mode = GET_MODE (x);
8888 if (TARGET_THUMB1)
8890 *total = thumb1_size_rtx_costs (x, code, outer_code);
8891 return true;
8894 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8895 switch (code)
8897 case MEM:
8898 /* A memory access costs 1 insn if the mode is small, or the address is
8899 a single register, otherwise it costs one insn per word. */
8900 if (REG_P (XEXP (x, 0)))
8901 *total = COSTS_N_INSNS (1);
8902 else if (flag_pic
8903 && GET_CODE (XEXP (x, 0)) == PLUS
8904 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8905 /* This will be split into two instructions.
8906 See arm.md:calculate_pic_address. */
8907 *total = COSTS_N_INSNS (2);
8908 else
8909 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8910 return true;
8912 case DIV:
8913 case MOD:
8914 case UDIV:
8915 case UMOD:
8916 /* Needs a libcall, so it costs about this. */
8917 *total = COSTS_N_INSNS (2);
8918 return false;
8920 case ROTATE:
8921 if (mode == SImode && REG_P (XEXP (x, 1)))
8923 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8924 return true;
8926 /* Fall through */
8927 case ROTATERT:
8928 case ASHIFT:
8929 case LSHIFTRT:
8930 case ASHIFTRT:
8931 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8933 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8934 return true;
8936 else if (mode == SImode)
8938 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8939 /* Slightly disparage register shifts, but not by much. */
8940 if (!CONST_INT_P (XEXP (x, 1)))
8941 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8942 return true;
8945 /* Needs a libcall. */
8946 *total = COSTS_N_INSNS (2);
8947 return false;
8949 case MINUS:
8950 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8951 && (mode == SFmode || !TARGET_VFP_SINGLE))
8953 *total = COSTS_N_INSNS (1);
8954 return false;
8957 if (mode == SImode)
8959 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8960 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8962 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8963 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8964 || subcode1 == ROTATE || subcode1 == ROTATERT
8965 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8966 || subcode1 == ASHIFTRT)
8968 /* It's just the cost of the two operands. */
8969 *total = 0;
8970 return false;
8973 *total = COSTS_N_INSNS (1);
8974 return false;
8977 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8978 return false;
8980 case PLUS:
8981 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8982 && (mode == SFmode || !TARGET_VFP_SINGLE))
8984 *total = COSTS_N_INSNS (1);
8985 return false;
8988 /* A shift as a part of ADD costs nothing. */
8989 if (GET_CODE (XEXP (x, 0)) == MULT
8990 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8992 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8994 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8995 return true;
8998 /* Fall through */
8999 case AND: case XOR: case IOR:
9000 if (mode == SImode)
9002 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9004 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9005 || subcode == LSHIFTRT || subcode == ASHIFTRT
9006 || (code == AND && subcode == NOT))
9008 /* It's just the cost of the two operands. */
9009 *total = 0;
9010 return false;
9014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9015 return false;
9017 case MULT:
9018 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9019 return false;
9021 case NEG:
9022 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9023 && (mode == SFmode || !TARGET_VFP_SINGLE))
9025 *total = COSTS_N_INSNS (1);
9026 return false;
9029 /* Fall through */
9030 case NOT:
9031 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9033 return false;
9035 case IF_THEN_ELSE:
9036 *total = 0;
9037 return false;
9039 case COMPARE:
9040 if (cc_register (XEXP (x, 0), VOIDmode))
9041 * total = 0;
9042 else
9043 *total = COSTS_N_INSNS (1);
9044 return false;
9046 case ABS:
9047 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9048 && (mode == SFmode || !TARGET_VFP_SINGLE))
9049 *total = COSTS_N_INSNS (1);
9050 else
9051 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9052 return false;
9054 case SIGN_EXTEND:
9055 case ZERO_EXTEND:
9056 return arm_rtx_costs_1 (x, outer_code, total, 0);
9058 case CONST_INT:
9059 if (const_ok_for_arm (INTVAL (x)))
9060 /* A multiplication by a constant requires another instruction
9061 to load the constant to a register. */
9062 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9063 ? 1 : 0);
9064 else if (const_ok_for_arm (~INTVAL (x)))
9065 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9066 else if (const_ok_for_arm (-INTVAL (x)))
9068 if (outer_code == COMPARE || outer_code == PLUS
9069 || outer_code == MINUS)
9070 *total = 0;
9071 else
9072 *total = COSTS_N_INSNS (1);
9074 else
9075 *total = COSTS_N_INSNS (2);
9076 return true;
9078 case CONST:
9079 case LABEL_REF:
9080 case SYMBOL_REF:
9081 *total = COSTS_N_INSNS (2);
9082 return true;
9084 case CONST_DOUBLE:
9085 *total = COSTS_N_INSNS (4);
9086 return true;
9088 case CONST_VECTOR:
9089 if (TARGET_NEON
9090 && TARGET_HARD_FLOAT
9091 && outer_code == SET
9092 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9093 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9094 *total = COSTS_N_INSNS (1);
9095 else
9096 *total = COSTS_N_INSNS (4);
9097 return true;
9099 case HIGH:
9100 case LO_SUM:
9101 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9102 cost of these slightly. */
9103 *total = COSTS_N_INSNS (1) + 1;
9104 return true;
9106 case SET:
9107 return false;
9109 default:
9110 if (mode != VOIDmode)
9111 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9112 else
9113 *total = COSTS_N_INSNS (4); /* How knows? */
9114 return false;
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9125 enum rtx_code code = GET_CODE (op);
9127 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129 return XEXP (op, 0);
9130 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131 return XEXP (op, 0);
9132 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133 || code == ASHIFTRT)
9135 if (!CONST_INT_P (XEXP (op, 1)))
9136 *shift_reg = XEXP (op, 1);
9137 return XEXP (op, 0);
9140 return NULL;
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9146 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147 gcc_assert (GET_CODE (x) == UNSPEC);
9149 switch (XINT (x, 1))
9151 case UNSPEC_UNALIGNED_LOAD:
9152 /* We can only do unaligned loads into the integer unit, and we can't
9153 use LDM or LDRD. */
9154 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9155 if (speed_p)
9156 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9157 + extra_cost->ldst.load_unaligned);
9159 #ifdef NOT_YET
9160 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9161 ADDR_SPACE_GENERIC, speed_p);
9162 #endif
9163 return true;
9165 case UNSPEC_UNALIGNED_STORE:
9166 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9167 if (speed_p)
9168 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9169 + extra_cost->ldst.store_unaligned);
9171 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9172 #ifdef NOT_YET
9173 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9174 ADDR_SPACE_GENERIC, speed_p);
9175 #endif
9176 return true;
9178 case UNSPEC_VRINTZ:
9179 case UNSPEC_VRINTP:
9180 case UNSPEC_VRINTM:
9181 case UNSPEC_VRINTR:
9182 case UNSPEC_VRINTX:
9183 case UNSPEC_VRINTA:
9184 *cost = COSTS_N_INSNS (1);
9185 if (speed_p)
9186 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9188 return true;
9189 default:
9190 *cost = COSTS_N_INSNS (2);
9191 break;
9193 return false;
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9201 do \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9207 if (shift_reg) \
9209 if (speed_p) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9213 else if (speed_p) \
9214 *cost += extra_cost->alu.arith_shift; \
9216 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9217 + rtx_cost (XEXP (x, 1 - IDX), \
9218 OP, 1, speed_p)); \
9219 return true; \
9222 while (0);
9224 /* RTX costs. Make an estimate of the cost of executing the operation
9225 X, which is contained with an operation with code OUTER_CODE.
9226 SPEED_P indicates whether the cost desired is the performance cost,
9227 or the size cost. The estimate is stored in COST and the return
9228 value is TRUE if the cost calculation is final, or FALSE if the
9229 caller should recurse through the operands of X to add additional
9230 costs.
9232 We currently make no attempt to model the size savings of Thumb-2
9233 16-bit instructions. At the normal points in compilation where
9234 this code is called we have no measure of whether the condition
9235 flags are live or not, and thus no realistic way to determine what
9236 the size will eventually be. */
9237 static bool
9238 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9239 const struct cpu_cost_table *extra_cost,
9240 int *cost, bool speed_p)
9242 enum machine_mode mode = GET_MODE (x);
9244 if (TARGET_THUMB1)
9246 if (speed_p)
9247 *cost = thumb1_rtx_costs (x, code, outer_code);
9248 else
9249 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9250 return true;
9253 switch (code)
9255 case SET:
9256 *cost = 0;
9257 /* SET RTXs don't have a mode so we get it from the destination. */
9258 mode = GET_MODE (SET_DEST (x));
9260 if (REG_P (SET_SRC (x))
9261 && REG_P (SET_DEST (x)))
9263 /* Assume that most copies can be done with a single insn,
9264 unless we don't have HW FP, in which case everything
9265 larger than word mode will require two insns. */
9266 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9267 && GET_MODE_SIZE (mode) > 4)
9268 || mode == DImode)
9269 ? 2 : 1);
9270 /* Conditional register moves can be encoded
9271 in 16 bits in Thumb mode. */
9272 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9273 *cost >>= 1;
9275 return true;
9278 if (CONST_INT_P (SET_SRC (x)))
9280 /* Handle CONST_INT here, since the value doesn't have a mode
9281 and we would otherwise be unable to work out the true cost. */
9282 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9283 outer_code = SET;
9284 /* Slightly lower the cost of setting a core reg to a constant.
9285 This helps break up chains and allows for better scheduling. */
9286 if (REG_P (SET_DEST (x))
9287 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9288 *cost -= 1;
9289 x = SET_SRC (x);
9290 /* Immediate moves with an immediate in the range [0, 255] can be
9291 encoded in 16 bits in Thumb mode. */
9292 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9293 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9294 *cost >>= 1;
9295 goto const_int_cost;
9298 return false;
9300 case MEM:
9301 /* A memory access costs 1 insn if the mode is small, or the address is
9302 a single register, otherwise it costs one insn per word. */
9303 if (REG_P (XEXP (x, 0)))
9304 *cost = COSTS_N_INSNS (1);
9305 else if (flag_pic
9306 && GET_CODE (XEXP (x, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308 /* This will be split into two instructions.
9309 See arm.md:calculate_pic_address. */
9310 *cost = COSTS_N_INSNS (2);
9311 else
9312 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9314 /* For speed optimizations, add the costs of the address and
9315 accessing memory. */
9316 if (speed_p)
9317 #ifdef NOT_YET
9318 *cost += (extra_cost->ldst.load
9319 + arm_address_cost (XEXP (x, 0), mode,
9320 ADDR_SPACE_GENERIC, speed_p));
9321 #else
9322 *cost += extra_cost->ldst.load;
9323 #endif
9324 return true;
9326 case PARALLEL:
9328 /* Calculations of LDM costs are complex. We assume an initial cost
9329 (ldm_1st) which will load the number of registers mentioned in
9330 ldm_regs_per_insn_1st registers; then each additional
9331 ldm_regs_per_insn_subsequent registers cost one more insn. The
9332 formula for N regs is thus:
9334 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335 + ldm_regs_per_insn_subsequent - 1)
9336 / ldm_regs_per_insn_subsequent).
9338 Additional costs may also be added for addressing. A similar
9339 formula is used for STM. */
9341 bool is_ldm = load_multiple_operation (x, SImode);
9342 bool is_stm = store_multiple_operation (x, SImode);
9344 *cost = COSTS_N_INSNS (1);
9346 if (is_ldm || is_stm)
9348 if (speed_p)
9350 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9351 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9352 ? extra_cost->ldst.ldm_regs_per_insn_1st
9353 : extra_cost->ldst.stm_regs_per_insn_1st;
9354 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9355 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9356 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9358 *cost += regs_per_insn_1st
9359 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9360 + regs_per_insn_sub - 1)
9361 / regs_per_insn_sub);
9362 return true;
9366 return false;
9368 case DIV:
9369 case UDIV:
9370 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9371 && (mode == SFmode || !TARGET_VFP_SINGLE))
9372 *cost = COSTS_N_INSNS (speed_p
9373 ? extra_cost->fp[mode != SFmode].div : 1);
9374 else if (mode == SImode && TARGET_IDIV)
9375 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9376 else
9377 *cost = LIBCALL_COST (2);
9378 return false; /* All arguments must be in registers. */
9380 case MOD:
9381 case UMOD:
9382 *cost = LIBCALL_COST (2);
9383 return false; /* All arguments must be in registers. */
9385 case ROTATE:
9386 if (mode == SImode && REG_P (XEXP (x, 1)))
9388 *cost = (COSTS_N_INSNS (2)
9389 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9390 if (speed_p)
9391 *cost += extra_cost->alu.shift_reg;
9392 return true;
9394 /* Fall through */
9395 case ROTATERT:
9396 case ASHIFT:
9397 case LSHIFTRT:
9398 case ASHIFTRT:
9399 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9401 *cost = (COSTS_N_INSNS (3)
9402 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9403 if (speed_p)
9404 *cost += 2 * extra_cost->alu.shift;
9405 return true;
9407 else if (mode == SImode)
9409 *cost = (COSTS_N_INSNS (1)
9410 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9411 /* Slightly disparage register shifts at -Os, but not by much. */
9412 if (!CONST_INT_P (XEXP (x, 1)))
9413 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9414 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9415 return true;
9417 else if (GET_MODE_CLASS (mode) == MODE_INT
9418 && GET_MODE_SIZE (mode) < 4)
9420 if (code == ASHIFT)
9422 *cost = (COSTS_N_INSNS (1)
9423 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9424 /* Slightly disparage register shifts at -Os, but not by
9425 much. */
9426 if (!CONST_INT_P (XEXP (x, 1)))
9427 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9428 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9430 else if (code == LSHIFTRT || code == ASHIFTRT)
9432 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9434 /* Can use SBFX/UBFX. */
9435 *cost = COSTS_N_INSNS (1);
9436 if (speed_p)
9437 *cost += extra_cost->alu.bfx;
9438 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9440 else
9442 *cost = COSTS_N_INSNS (2);
9443 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9444 if (speed_p)
9446 if (CONST_INT_P (XEXP (x, 1)))
9447 *cost += 2 * extra_cost->alu.shift;
9448 else
9449 *cost += (extra_cost->alu.shift
9450 + extra_cost->alu.shift_reg);
9452 else
9453 /* Slightly disparage register shifts. */
9454 *cost += !CONST_INT_P (XEXP (x, 1));
9457 else /* Rotates. */
9459 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9460 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9461 if (speed_p)
9463 if (CONST_INT_P (XEXP (x, 1)))
9464 *cost += (2 * extra_cost->alu.shift
9465 + extra_cost->alu.log_shift);
9466 else
9467 *cost += (extra_cost->alu.shift
9468 + extra_cost->alu.shift_reg
9469 + extra_cost->alu.log_shift_reg);
9472 return true;
9475 *cost = LIBCALL_COST (2);
9476 return false;
9478 case BSWAP:
9479 if (arm_arch6)
9481 if (mode == SImode)
9483 *cost = COSTS_N_INSNS (1);
9484 if (speed_p)
9485 *cost += extra_cost->alu.rev;
9487 return false;
9490 else
9492 /* No rev instruction available. Look at arm_legacy_rev
9493 and thumb_legacy_rev for the form of RTL used then. */
9494 if (TARGET_THUMB)
9496 *cost = COSTS_N_INSNS (10);
9498 if (speed_p)
9500 *cost += 6 * extra_cost->alu.shift;
9501 *cost += 3 * extra_cost->alu.logical;
9504 else
9506 *cost = COSTS_N_INSNS (5);
9508 if (speed_p)
9510 *cost += 2 * extra_cost->alu.shift;
9511 *cost += extra_cost->alu.arith_shift;
9512 *cost += 2 * extra_cost->alu.logical;
9515 return true;
9517 return false;
9519 case MINUS:
9520 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9521 && (mode == SFmode || !TARGET_VFP_SINGLE))
9523 *cost = COSTS_N_INSNS (1);
9524 if (GET_CODE (XEXP (x, 0)) == MULT
9525 || GET_CODE (XEXP (x, 1)) == MULT)
9527 rtx mul_op0, mul_op1, sub_op;
9529 if (speed_p)
9530 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9532 if (GET_CODE (XEXP (x, 0)) == MULT)
9534 mul_op0 = XEXP (XEXP (x, 0), 0);
9535 mul_op1 = XEXP (XEXP (x, 0), 1);
9536 sub_op = XEXP (x, 1);
9538 else
9540 mul_op0 = XEXP (XEXP (x, 1), 0);
9541 mul_op1 = XEXP (XEXP (x, 1), 1);
9542 sub_op = XEXP (x, 0);
9545 /* The first operand of the multiply may be optionally
9546 negated. */
9547 if (GET_CODE (mul_op0) == NEG)
9548 mul_op0 = XEXP (mul_op0, 0);
9550 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9551 + rtx_cost (mul_op1, code, 0, speed_p)
9552 + rtx_cost (sub_op, code, 0, speed_p));
9554 return true;
9557 if (speed_p)
9558 *cost += extra_cost->fp[mode != SFmode].addsub;
9559 return false;
9562 if (mode == SImode)
9564 rtx shift_by_reg = NULL;
9565 rtx shift_op;
9566 rtx non_shift_op;
9568 *cost = COSTS_N_INSNS (1);
9570 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9571 if (shift_op == NULL)
9573 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9574 non_shift_op = XEXP (x, 0);
9576 else
9577 non_shift_op = XEXP (x, 1);
9579 if (shift_op != NULL)
9581 if (shift_by_reg != NULL)
9583 if (speed_p)
9584 *cost += extra_cost->alu.arith_shift_reg;
9585 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9587 else if (speed_p)
9588 *cost += extra_cost->alu.arith_shift;
9590 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9591 + rtx_cost (non_shift_op, code, 0, speed_p));
9592 return true;
9595 if (arm_arch_thumb2
9596 && GET_CODE (XEXP (x, 1)) == MULT)
9598 /* MLS. */
9599 if (speed_p)
9600 *cost += extra_cost->mult[0].add;
9601 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9602 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9603 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9604 return true;
9607 if (CONST_INT_P (XEXP (x, 0)))
9609 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9610 INTVAL (XEXP (x, 0)), NULL_RTX,
9611 NULL_RTX, 1, 0);
9612 *cost = COSTS_N_INSNS (insns);
9613 if (speed_p)
9614 *cost += insns * extra_cost->alu.arith;
9615 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9616 return true;
9619 return false;
9622 if (GET_MODE_CLASS (mode) == MODE_INT
9623 && GET_MODE_SIZE (mode) < 4)
9625 rtx shift_op, shift_reg;
9626 shift_reg = NULL;
9628 /* We check both sides of the MINUS for shifter operands since,
9629 unlike PLUS, it's not commutative. */
9631 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9632 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9634 /* Slightly disparage, as we might need to widen the result. */
9635 *cost = 1 + COSTS_N_INSNS (1);
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith;
9639 if (CONST_INT_P (XEXP (x, 0)))
9641 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9642 return true;
9645 return false;
9648 if (mode == DImode)
9650 *cost = COSTS_N_INSNS (2);
9652 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9654 rtx op1 = XEXP (x, 1);
9656 if (speed_p)
9657 *cost += 2 * extra_cost->alu.arith;
9659 if (GET_CODE (op1) == ZERO_EXTEND)
9660 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9661 else
9662 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9663 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9664 0, speed_p);
9665 return true;
9667 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9669 if (speed_p)
9670 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9671 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9672 0, speed_p)
9673 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9674 return true;
9676 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9677 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9679 if (speed_p)
9680 *cost += (extra_cost->alu.arith
9681 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9682 ? extra_cost->alu.arith
9683 : extra_cost->alu.arith_shift));
9684 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9685 + rtx_cost (XEXP (XEXP (x, 1), 0),
9686 GET_CODE (XEXP (x, 1)), 0, speed_p));
9687 return true;
9690 if (speed_p)
9691 *cost += 2 * extra_cost->alu.arith;
9692 return false;
9695 /* Vector mode? */
9697 *cost = LIBCALL_COST (2);
9698 return false;
9700 case PLUS:
9701 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9702 && (mode == SFmode || !TARGET_VFP_SINGLE))
9704 *cost = COSTS_N_INSNS (1);
9705 if (GET_CODE (XEXP (x, 0)) == MULT)
9707 rtx mul_op0, mul_op1, add_op;
9709 if (speed_p)
9710 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9712 mul_op0 = XEXP (XEXP (x, 0), 0);
9713 mul_op1 = XEXP (XEXP (x, 0), 1);
9714 add_op = XEXP (x, 1);
9716 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9717 + rtx_cost (mul_op1, code, 0, speed_p)
9718 + rtx_cost (add_op, code, 0, speed_p));
9720 return true;
9723 if (speed_p)
9724 *cost += extra_cost->fp[mode != SFmode].addsub;
9725 return false;
9727 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9729 *cost = LIBCALL_COST (2);
9730 return false;
9733 /* Narrow modes can be synthesized in SImode, but the range
9734 of useful sub-operations is limited. Check for shift operations
9735 on one of the operands. Only left shifts can be used in the
9736 narrow modes. */
9737 if (GET_MODE_CLASS (mode) == MODE_INT
9738 && GET_MODE_SIZE (mode) < 4)
9740 rtx shift_op, shift_reg;
9741 shift_reg = NULL;
9743 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9745 if (CONST_INT_P (XEXP (x, 1)))
9747 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9748 INTVAL (XEXP (x, 1)), NULL_RTX,
9749 NULL_RTX, 1, 0);
9750 *cost = COSTS_N_INSNS (insns);
9751 if (speed_p)
9752 *cost += insns * extra_cost->alu.arith;
9753 /* Slightly penalize a narrow operation as the result may
9754 need widening. */
9755 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9756 return true;
9759 /* Slightly penalize a narrow operation as the result may
9760 need widening. */
9761 *cost = 1 + COSTS_N_INSNS (1);
9762 if (speed_p)
9763 *cost += extra_cost->alu.arith;
9765 return false;
9768 if (mode == SImode)
9770 rtx shift_op, shift_reg;
9772 *cost = COSTS_N_INSNS (1);
9773 if (TARGET_INT_SIMD
9774 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9775 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9777 /* UXTA[BH] or SXTA[BH]. */
9778 if (speed_p)
9779 *cost += extra_cost->alu.extend_arith;
9780 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9781 speed_p)
9782 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9783 return true;
9786 shift_reg = NULL;
9787 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9788 if (shift_op != NULL)
9790 if (shift_reg)
9792 if (speed_p)
9793 *cost += extra_cost->alu.arith_shift_reg;
9794 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9796 else if (speed_p)
9797 *cost += extra_cost->alu.arith_shift;
9799 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9800 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9801 return true;
9803 if (GET_CODE (XEXP (x, 0)) == MULT)
9805 rtx mul_op = XEXP (x, 0);
9807 *cost = COSTS_N_INSNS (1);
9809 if (TARGET_DSP_MULTIPLY
9810 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822 == 16))))))
9824 /* SMLA[BT][BT]. */
9825 if (speed_p)
9826 *cost += extra_cost->mult[0].extend_add;
9827 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9828 SIGN_EXTEND, 0, speed_p)
9829 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9830 SIGN_EXTEND, 0, speed_p)
9831 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9832 return true;
9835 if (speed_p)
9836 *cost += extra_cost->mult[0].add;
9837 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9838 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9839 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9840 return true;
9842 if (CONST_INT_P (XEXP (x, 1)))
9844 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845 INTVAL (XEXP (x, 1)), NULL_RTX,
9846 NULL_RTX, 1, 0);
9847 *cost = COSTS_N_INSNS (insns);
9848 if (speed_p)
9849 *cost += insns * extra_cost->alu.arith;
9850 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9851 return true;
9853 return false;
9856 if (mode == DImode)
9858 if (arm_arch3m
9859 && GET_CODE (XEXP (x, 0)) == MULT
9860 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9862 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9865 *cost = COSTS_N_INSNS (1);
9866 if (speed_p)
9867 *cost += extra_cost->mult[1].extend_add;
9868 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9869 ZERO_EXTEND, 0, speed_p)
9870 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9871 ZERO_EXTEND, 0, speed_p)
9872 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9873 return true;
9876 *cost = COSTS_N_INSNS (2);
9878 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9879 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9881 if (speed_p)
9882 *cost += (extra_cost->alu.arith
9883 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9884 ? extra_cost->alu.arith
9885 : extra_cost->alu.arith_shift));
9887 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9888 speed_p)
9889 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9890 return true;
9893 if (speed_p)
9894 *cost += 2 * extra_cost->alu.arith;
9895 return false;
9898 /* Vector mode? */
9899 *cost = LIBCALL_COST (2);
9900 return false;
9901 case IOR:
9902 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9904 *cost = COSTS_N_INSNS (1);
9905 if (speed_p)
9906 *cost += extra_cost->alu.rev;
9908 return true;
9910 /* Fall through. */
9911 case AND: case XOR:
9912 if (mode == SImode)
9914 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9915 rtx op0 = XEXP (x, 0);
9916 rtx shift_op, shift_reg;
9918 *cost = COSTS_N_INSNS (1);
9920 if (subcode == NOT
9921 && (code == AND
9922 || (code == IOR && TARGET_THUMB2)))
9923 op0 = XEXP (op0, 0);
9925 shift_reg = NULL;
9926 shift_op = shifter_op_p (op0, &shift_reg);
9927 if (shift_op != NULL)
9929 if (shift_reg)
9931 if (speed_p)
9932 *cost += extra_cost->alu.log_shift_reg;
9933 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9935 else if (speed_p)
9936 *cost += extra_cost->alu.log_shift;
9938 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9939 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9940 return true;
9943 if (CONST_INT_P (XEXP (x, 1)))
9945 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9946 INTVAL (XEXP (x, 1)), NULL_RTX,
9947 NULL_RTX, 1, 0);
9949 *cost = COSTS_N_INSNS (insns);
9950 if (speed_p)
9951 *cost += insns * extra_cost->alu.logical;
9952 *cost += rtx_cost (op0, code, 0, speed_p);
9953 return true;
9956 if (speed_p)
9957 *cost += extra_cost->alu.logical;
9958 *cost += (rtx_cost (op0, code, 0, speed_p)
9959 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9960 return true;
9963 if (mode == DImode)
9965 rtx op0 = XEXP (x, 0);
9966 enum rtx_code subcode = GET_CODE (op0);
9968 *cost = COSTS_N_INSNS (2);
9970 if (subcode == NOT
9971 && (code == AND
9972 || (code == IOR && TARGET_THUMB2)))
9973 op0 = XEXP (op0, 0);
9975 if (GET_CODE (op0) == ZERO_EXTEND)
9977 if (speed_p)
9978 *cost += 2 * extra_cost->alu.logical;
9980 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9981 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9982 return true;
9984 else if (GET_CODE (op0) == SIGN_EXTEND)
9986 if (speed_p)
9987 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9989 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9990 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9991 return true;
9994 if (speed_p)
9995 *cost += 2 * extra_cost->alu.logical;
9997 return true;
9999 /* Vector mode? */
10001 *cost = LIBCALL_COST (2);
10002 return false;
10004 case MULT:
10005 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10006 && (mode == SFmode || !TARGET_VFP_SINGLE))
10008 rtx op0 = XEXP (x, 0);
10010 *cost = COSTS_N_INSNS (1);
10012 if (GET_CODE (op0) == NEG)
10013 op0 = XEXP (op0, 0);
10015 if (speed_p)
10016 *cost += extra_cost->fp[mode != SFmode].mult;
10018 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10019 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10020 return true;
10022 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10024 *cost = LIBCALL_COST (2);
10025 return false;
10028 if (mode == SImode)
10030 *cost = COSTS_N_INSNS (1);
10031 if (TARGET_DSP_MULTIPLY
10032 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10033 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10034 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10035 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10036 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10037 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10038 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10039 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10040 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10041 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10042 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10043 && (INTVAL (XEXP (XEXP (x, 1), 1))
10044 == 16))))))
10046 /* SMUL[TB][TB]. */
10047 if (speed_p)
10048 *cost += extra_cost->mult[0].extend;
10049 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10050 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10051 return true;
10053 if (speed_p)
10054 *cost += extra_cost->mult[0].simple;
10055 return false;
10058 if (mode == DImode)
10060 if (arm_arch3m
10061 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10066 *cost = COSTS_N_INSNS (1);
10067 if (speed_p)
10068 *cost += extra_cost->mult[1].extend;
10069 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10070 ZERO_EXTEND, 0, speed_p)
10071 + rtx_cost (XEXP (XEXP (x, 1), 0),
10072 ZERO_EXTEND, 0, speed_p));
10073 return true;
10076 *cost = LIBCALL_COST (2);
10077 return false;
10080 /* Vector mode? */
10081 *cost = LIBCALL_COST (2);
10082 return false;
10084 case NEG:
10085 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10086 && (mode == SFmode || !TARGET_VFP_SINGLE))
10088 *cost = COSTS_N_INSNS (1);
10089 if (speed_p)
10090 *cost += extra_cost->fp[mode != SFmode].neg;
10092 return false;
10094 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10096 *cost = LIBCALL_COST (1);
10097 return false;
10100 if (mode == SImode)
10102 if (GET_CODE (XEXP (x, 0)) == ABS)
10104 *cost = COSTS_N_INSNS (2);
10105 /* Assume the non-flag-changing variant. */
10106 if (speed_p)
10107 *cost += (extra_cost->alu.log_shift
10108 + extra_cost->alu.arith_shift);
10109 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10110 return true;
10113 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10114 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10116 *cost = COSTS_N_INSNS (2);
10117 /* No extra cost for MOV imm and MVN imm. */
10118 /* If the comparison op is using the flags, there's no further
10119 cost, otherwise we need to add the cost of the comparison. */
10120 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10121 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10122 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10124 *cost += (COSTS_N_INSNS (1)
10125 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10126 speed_p)
10127 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10128 speed_p));
10129 if (speed_p)
10130 *cost += extra_cost->alu.arith;
10132 return true;
10134 *cost = COSTS_N_INSNS (1);
10135 if (speed_p)
10136 *cost += extra_cost->alu.arith;
10137 return false;
10140 if (GET_MODE_CLASS (mode) == MODE_INT
10141 && GET_MODE_SIZE (mode) < 4)
10143 /* Slightly disparage, as we might need an extend operation. */
10144 *cost = 1 + COSTS_N_INSNS (1);
10145 if (speed_p)
10146 *cost += extra_cost->alu.arith;
10147 return false;
10150 if (mode == DImode)
10152 *cost = COSTS_N_INSNS (2);
10153 if (speed_p)
10154 *cost += 2 * extra_cost->alu.arith;
10155 return false;
10158 /* Vector mode? */
10159 *cost = LIBCALL_COST (1);
10160 return false;
10162 case NOT:
10163 if (mode == SImode)
10165 rtx shift_op;
10166 rtx shift_reg = NULL;
10168 *cost = COSTS_N_INSNS (1);
10169 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10171 if (shift_op)
10173 if (shift_reg != NULL)
10175 if (speed_p)
10176 *cost += extra_cost->alu.log_shift_reg;
10177 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10179 else if (speed_p)
10180 *cost += extra_cost->alu.log_shift;
10181 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10182 return true;
10185 if (speed_p)
10186 *cost += extra_cost->alu.logical;
10187 return false;
10189 if (mode == DImode)
10191 *cost = COSTS_N_INSNS (2);
10192 return false;
10195 /* Vector mode? */
10197 *cost += LIBCALL_COST (1);
10198 return false;
10200 case IF_THEN_ELSE:
10202 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10204 *cost = COSTS_N_INSNS (4);
10205 return true;
10207 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10208 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10210 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10211 /* Assume that if one arm of the if_then_else is a register,
10212 that it will be tied with the result and eliminate the
10213 conditional insn. */
10214 if (REG_P (XEXP (x, 1)))
10215 *cost += op2cost;
10216 else if (REG_P (XEXP (x, 2)))
10217 *cost += op1cost;
10218 else
10220 if (speed_p)
10222 if (extra_cost->alu.non_exec_costs_exec)
10223 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10224 else
10225 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10227 else
10228 *cost += op1cost + op2cost;
10231 return true;
10233 case COMPARE:
10234 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10235 *cost = 0;
10236 else
10238 enum machine_mode op0mode;
10239 /* We'll mostly assume that the cost of a compare is the cost of the
10240 LHS. However, there are some notable exceptions. */
10242 /* Floating point compares are never done as side-effects. */
10243 op0mode = GET_MODE (XEXP (x, 0));
10244 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10245 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10247 *cost = COSTS_N_INSNS (1);
10248 if (speed_p)
10249 *cost += extra_cost->fp[op0mode != SFmode].compare;
10251 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10253 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10254 return true;
10257 return false;
10259 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10261 *cost = LIBCALL_COST (2);
10262 return false;
10265 /* DImode compares normally take two insns. */
10266 if (op0mode == DImode)
10268 *cost = COSTS_N_INSNS (2);
10269 if (speed_p)
10270 *cost += 2 * extra_cost->alu.arith;
10271 return false;
10274 if (op0mode == SImode)
10276 rtx shift_op;
10277 rtx shift_reg;
10279 if (XEXP (x, 1) == const0_rtx
10280 && !(REG_P (XEXP (x, 0))
10281 || (GET_CODE (XEXP (x, 0)) == SUBREG
10282 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10284 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10286 /* Multiply operations that set the flags are often
10287 significantly more expensive. */
10288 if (speed_p
10289 && GET_CODE (XEXP (x, 0)) == MULT
10290 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10291 *cost += extra_cost->mult[0].flag_setting;
10293 if (speed_p
10294 && GET_CODE (XEXP (x, 0)) == PLUS
10295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10296 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10297 0), 1), mode))
10298 *cost += extra_cost->mult[0].flag_setting;
10299 return true;
10302 shift_reg = NULL;
10303 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10304 if (shift_op != NULL)
10306 *cost = COSTS_N_INSNS (1);
10307 if (shift_reg != NULL)
10309 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10310 if (speed_p)
10311 *cost += extra_cost->alu.arith_shift_reg;
10313 else if (speed_p)
10314 *cost += extra_cost->alu.arith_shift;
10315 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10316 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10317 return true;
10320 *cost = COSTS_N_INSNS (1);
10321 if (speed_p)
10322 *cost += extra_cost->alu.arith;
10323 if (CONST_INT_P (XEXP (x, 1))
10324 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10326 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10327 return true;
10329 return false;
10332 /* Vector mode? */
10334 *cost = LIBCALL_COST (2);
10335 return false;
10337 return true;
10339 case EQ:
10340 case NE:
10341 case LT:
10342 case LE:
10343 case GT:
10344 case GE:
10345 case LTU:
10346 case LEU:
10347 case GEU:
10348 case GTU:
10349 case ORDERED:
10350 case UNORDERED:
10351 case UNEQ:
10352 case UNLE:
10353 case UNLT:
10354 case UNGE:
10355 case UNGT:
10356 case LTGT:
10357 if (outer_code == SET)
10359 /* Is it a store-flag operation? */
10360 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10361 && XEXP (x, 1) == const0_rtx)
10363 /* Thumb also needs an IT insn. */
10364 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10365 return true;
10367 if (XEXP (x, 1) == const0_rtx)
10369 switch (code)
10371 case LT:
10372 /* LSR Rd, Rn, #31. */
10373 *cost = COSTS_N_INSNS (1);
10374 if (speed_p)
10375 *cost += extra_cost->alu.shift;
10376 break;
10378 case EQ:
10379 /* RSBS T1, Rn, #0
10380 ADC Rd, Rn, T1. */
10382 case NE:
10383 /* SUBS T1, Rn, #1
10384 SBC Rd, Rn, T1. */
10385 *cost = COSTS_N_INSNS (2);
10386 break;
10388 case LE:
10389 /* RSBS T1, Rn, Rn, LSR #31
10390 ADC Rd, Rn, T1. */
10391 *cost = COSTS_N_INSNS (2);
10392 if (speed_p)
10393 *cost += extra_cost->alu.arith_shift;
10394 break;
10396 case GT:
10397 /* RSB Rd, Rn, Rn, ASR #1
10398 LSR Rd, Rd, #31. */
10399 *cost = COSTS_N_INSNS (2);
10400 if (speed_p)
10401 *cost += (extra_cost->alu.arith_shift
10402 + extra_cost->alu.shift);
10403 break;
10405 case GE:
10406 /* ASR Rd, Rn, #31
10407 ADD Rd, Rn, #1. */
10408 *cost = COSTS_N_INSNS (2);
10409 if (speed_p)
10410 *cost += extra_cost->alu.shift;
10411 break;
10413 default:
10414 /* Remaining cases are either meaningless or would take
10415 three insns anyway. */
10416 *cost = COSTS_N_INSNS (3);
10417 break;
10419 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10420 return true;
10422 else
10424 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10425 if (CONST_INT_P (XEXP (x, 1))
10426 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10428 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10429 return true;
10432 return false;
10435 /* Not directly inside a set. If it involves the condition code
10436 register it must be the condition for a branch, cond_exec or
10437 I_T_E operation. Since the comparison is performed elsewhere
10438 this is just the control part which has no additional
10439 cost. */
10440 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10441 && XEXP (x, 1) == const0_rtx)
10443 *cost = 0;
10444 return true;
10446 return false;
10448 case ABS:
10449 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10450 && (mode == SFmode || !TARGET_VFP_SINGLE))
10452 *cost = COSTS_N_INSNS (1);
10453 if (speed_p)
10454 *cost += extra_cost->fp[mode != SFmode].neg;
10456 return false;
10458 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10460 *cost = LIBCALL_COST (1);
10461 return false;
10464 if (mode == SImode)
10466 *cost = COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469 return false;
10471 /* Vector mode? */
10472 *cost = LIBCALL_COST (1);
10473 return false;
10475 case SIGN_EXTEND:
10476 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477 && MEM_P (XEXP (x, 0)))
10479 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10481 if (mode == DImode)
10482 *cost += COSTS_N_INSNS (1);
10484 if (!speed_p)
10485 return true;
10487 if (GET_MODE (XEXP (x, 0)) == SImode)
10488 *cost += extra_cost->ldst.load;
10489 else
10490 *cost += extra_cost->ldst.load_sign_extend;
10492 if (mode == DImode)
10493 *cost += extra_cost->alu.shift;
10495 return true;
10498 /* Widening from less than 32-bits requires an extend operation. */
10499 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10501 /* We have SXTB/SXTH. */
10502 *cost = COSTS_N_INSNS (1);
10503 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10504 if (speed_p)
10505 *cost += extra_cost->alu.extend;
10507 else if (GET_MODE (XEXP (x, 0)) != SImode)
10509 /* Needs two shifts. */
10510 *cost = COSTS_N_INSNS (2);
10511 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10512 if (speed_p)
10513 *cost += 2 * extra_cost->alu.shift;
10516 /* Widening beyond 32-bits requires one more insn. */
10517 if (mode == DImode)
10519 *cost += COSTS_N_INSNS (1);
10520 if (speed_p)
10521 *cost += extra_cost->alu.shift;
10524 return true;
10526 case ZERO_EXTEND:
10527 if ((arm_arch4
10528 || GET_MODE (XEXP (x, 0)) == SImode
10529 || GET_MODE (XEXP (x, 0)) == QImode)
10530 && MEM_P (XEXP (x, 0)))
10532 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10534 if (mode == DImode)
10535 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10537 return true;
10540 /* Widening from less than 32-bits requires an extend operation. */
10541 if (GET_MODE (XEXP (x, 0)) == QImode)
10543 /* UXTB can be a shorter instruction in Thumb2, but it might
10544 be slower than the AND Rd, Rn, #255 alternative. When
10545 optimizing for speed it should never be slower to use
10546 AND, and we don't really model 16-bit vs 32-bit insns
10547 here. */
10548 *cost = COSTS_N_INSNS (1);
10549 if (speed_p)
10550 *cost += extra_cost->alu.logical;
10552 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10554 /* We have UXTB/UXTH. */
10555 *cost = COSTS_N_INSNS (1);
10556 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10557 if (speed_p)
10558 *cost += extra_cost->alu.extend;
10560 else if (GET_MODE (XEXP (x, 0)) != SImode)
10562 /* Needs two shifts. It's marginally preferable to use
10563 shifts rather than two BIC instructions as the second
10564 shift may merge with a subsequent insn as a shifter
10565 op. */
10566 *cost = COSTS_N_INSNS (2);
10567 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10568 if (speed_p)
10569 *cost += 2 * extra_cost->alu.shift;
10571 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10572 *cost = COSTS_N_INSNS (1);
10574 /* Widening beyond 32-bits requires one more insn. */
10575 if (mode == DImode)
10577 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10580 return true;
10582 case CONST_INT:
10583 *cost = 0;
10584 /* CONST_INT has no mode, so we cannot tell for sure how many
10585 insns are really going to be needed. The best we can do is
10586 look at the value passed. If it fits in SImode, then assume
10587 that's the mode it will be used for. Otherwise assume it
10588 will be used in DImode. */
10589 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10590 mode = SImode;
10591 else
10592 mode = DImode;
10594 /* Avoid blowing up in arm_gen_constant (). */
10595 if (!(outer_code == PLUS
10596 || outer_code == AND
10597 || outer_code == IOR
10598 || outer_code == XOR
10599 || outer_code == MINUS))
10600 outer_code = SET;
10602 const_int_cost:
10603 if (mode == SImode)
10605 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10606 INTVAL (x), NULL, NULL,
10607 0, 0));
10608 /* Extra costs? */
10610 else
10612 *cost += COSTS_N_INSNS (arm_gen_constant
10613 (outer_code, SImode, NULL,
10614 trunc_int_for_mode (INTVAL (x), SImode),
10615 NULL, NULL, 0, 0)
10616 + arm_gen_constant (outer_code, SImode, NULL,
10617 INTVAL (x) >> 32, NULL,
10618 NULL, 0, 0));
10619 /* Extra costs? */
10622 return true;
10624 case CONST:
10625 case LABEL_REF:
10626 case SYMBOL_REF:
10627 if (speed_p)
10629 if (arm_arch_thumb2 && !flag_pic)
10630 *cost = COSTS_N_INSNS (2);
10631 else
10632 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10634 else
10635 *cost = COSTS_N_INSNS (2);
10637 if (flag_pic)
10639 *cost += COSTS_N_INSNS (1);
10640 if (speed_p)
10641 *cost += extra_cost->alu.arith;
10644 return true;
10646 case CONST_FIXED:
10647 *cost = COSTS_N_INSNS (4);
10648 /* Fixme. */
10649 return true;
10651 case CONST_DOUBLE:
10652 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10653 && (mode == SFmode || !TARGET_VFP_SINGLE))
10655 if (vfp3_const_double_rtx (x))
10657 *cost = COSTS_N_INSNS (1);
10658 if (speed_p)
10659 *cost += extra_cost->fp[mode == DFmode].fpconst;
10660 return true;
10663 if (speed_p)
10665 *cost = COSTS_N_INSNS (1);
10666 if (mode == DFmode)
10667 *cost += extra_cost->ldst.loadd;
10668 else
10669 *cost += extra_cost->ldst.loadf;
10671 else
10672 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10674 return true;
10676 *cost = COSTS_N_INSNS (4);
10677 return true;
10679 case CONST_VECTOR:
10680 /* Fixme. */
10681 if (TARGET_NEON
10682 && TARGET_HARD_FLOAT
10683 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10684 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10685 *cost = COSTS_N_INSNS (1);
10686 else
10687 *cost = COSTS_N_INSNS (4);
10688 return true;
10690 case HIGH:
10691 case LO_SUM:
10692 *cost = COSTS_N_INSNS (1);
10693 /* When optimizing for size, we prefer constant pool entries to
10694 MOVW/MOVT pairs, so bump the cost of these slightly. */
10695 if (!speed_p)
10696 *cost += 1;
10697 return true;
10699 case CLZ:
10700 *cost = COSTS_N_INSNS (1);
10701 if (speed_p)
10702 *cost += extra_cost->alu.clz;
10703 return false;
10705 case SMIN:
10706 if (XEXP (x, 1) == const0_rtx)
10708 *cost = COSTS_N_INSNS (1);
10709 if (speed_p)
10710 *cost += extra_cost->alu.log_shift;
10711 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10712 return true;
10714 /* Fall through. */
10715 case SMAX:
10716 case UMIN:
10717 case UMAX:
10718 *cost = COSTS_N_INSNS (2);
10719 return false;
10721 case TRUNCATE:
10722 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10723 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10724 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10726 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10727 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10728 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10729 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10730 == ZERO_EXTEND))))
10732 *cost = COSTS_N_INSNS (1);
10733 if (speed_p)
10734 *cost += extra_cost->mult[1].extend;
10735 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10736 speed_p)
10737 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10738 0, speed_p));
10739 return true;
10741 *cost = LIBCALL_COST (1);
10742 return false;
10744 case UNSPEC:
10745 return arm_unspec_cost (x, outer_code, speed_p, cost);
10747 case PC:
10748 /* Reading the PC is like reading any other register. Writing it
10749 is more expensive, but we take that into account elsewhere. */
10750 *cost = 0;
10751 return true;
10753 case ZERO_EXTRACT:
10754 /* TODO: Simple zero_extract of bottom bits using AND. */
10755 /* Fall through. */
10756 case SIGN_EXTRACT:
10757 if (arm_arch6
10758 && mode == SImode
10759 && CONST_INT_P (XEXP (x, 1))
10760 && CONST_INT_P (XEXP (x, 2)))
10762 *cost = COSTS_N_INSNS (1);
10763 if (speed_p)
10764 *cost += extra_cost->alu.bfx;
10765 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10766 return true;
10768 /* Without UBFX/SBFX, need to resort to shift operations. */
10769 *cost = COSTS_N_INSNS (2);
10770 if (speed_p)
10771 *cost += 2 * extra_cost->alu.shift;
10772 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10773 return true;
10775 case FLOAT_EXTEND:
10776 if (TARGET_HARD_FLOAT)
10778 *cost = COSTS_N_INSNS (1);
10779 if (speed_p)
10780 *cost += extra_cost->fp[mode == DFmode].widen;
10781 if (!TARGET_FPU_ARMV8
10782 && GET_MODE (XEXP (x, 0)) == HFmode)
10784 /* Pre v8, widening HF->DF is a two-step process, first
10785 widening to SFmode. */
10786 *cost += COSTS_N_INSNS (1);
10787 if (speed_p)
10788 *cost += extra_cost->fp[0].widen;
10790 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10791 return true;
10794 *cost = LIBCALL_COST (1);
10795 return false;
10797 case FLOAT_TRUNCATE:
10798 if (TARGET_HARD_FLOAT)
10800 *cost = COSTS_N_INSNS (1);
10801 if (speed_p)
10802 *cost += extra_cost->fp[mode == DFmode].narrow;
10803 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10804 return true;
10805 /* Vector modes? */
10807 *cost = LIBCALL_COST (1);
10808 return false;
10810 case FMA:
10811 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10813 rtx op0 = XEXP (x, 0);
10814 rtx op1 = XEXP (x, 1);
10815 rtx op2 = XEXP (x, 2);
10817 *cost = COSTS_N_INSNS (1);
10819 /* vfms or vfnma. */
10820 if (GET_CODE (op0) == NEG)
10821 op0 = XEXP (op0, 0);
10823 /* vfnms or vfnma. */
10824 if (GET_CODE (op2) == NEG)
10825 op2 = XEXP (op2, 0);
10827 *cost += rtx_cost (op0, FMA, 0, speed_p);
10828 *cost += rtx_cost (op1, FMA, 1, speed_p);
10829 *cost += rtx_cost (op2, FMA, 2, speed_p);
10831 if (speed_p)
10832 *cost += extra_cost->fp[mode ==DFmode].fma;
10834 return true;
10837 *cost = LIBCALL_COST (3);
10838 return false;
10840 case FIX:
10841 case UNSIGNED_FIX:
10842 if (TARGET_HARD_FLOAT)
10844 if (GET_MODE_CLASS (mode) == MODE_INT)
10846 *cost = COSTS_N_INSNS (1);
10847 if (speed_p)
10848 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10849 /* Strip of the 'cost' of rounding towards zero. */
10850 if (GET_CODE (XEXP (x, 0)) == FIX)
10851 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10852 else
10853 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10854 /* ??? Increase the cost to deal with transferring from
10855 FP -> CORE registers? */
10856 return true;
10858 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10859 && TARGET_FPU_ARMV8)
10861 *cost = COSTS_N_INSNS (1);
10862 if (speed_p)
10863 *cost += extra_cost->fp[mode == DFmode].roundint;
10864 return false;
10866 /* Vector costs? */
10868 *cost = LIBCALL_COST (1);
10869 return false;
10871 case FLOAT:
10872 case UNSIGNED_FLOAT:
10873 if (TARGET_HARD_FLOAT)
10875 /* ??? Increase the cost to deal with transferring from CORE
10876 -> FP registers? */
10877 *cost = COSTS_N_INSNS (1);
10878 if (speed_p)
10879 *cost += extra_cost->fp[mode == DFmode].fromint;
10880 return false;
10882 *cost = LIBCALL_COST (1);
10883 return false;
10885 case CALL:
10886 *cost = COSTS_N_INSNS (1);
10887 return true;
10889 case ASM_OPERANDS:
10891 /* Just a guess. Guess number of instructions in the asm
10892 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10893 though (see PR60663). */
10894 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10895 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10897 *cost = COSTS_N_INSNS (asm_length + num_operands);
10898 return true;
10900 default:
10901 if (mode != VOIDmode)
10902 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10903 else
10904 *cost = COSTS_N_INSNS (4); /* Who knows? */
10905 return false;
10909 #undef HANDLE_NARROW_SHIFT_ARITH
10911 /* RTX costs when optimizing for size. */
10912 static bool
10913 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10914 int *total, bool speed)
10916 bool result;
10918 if (TARGET_OLD_RTX_COSTS
10919 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10921 /* Old way. (Deprecated.) */
10922 if (!speed)
10923 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10924 (enum rtx_code) outer_code, total);
10925 else
10926 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10927 (enum rtx_code) outer_code, total,
10928 speed);
10930 else
10932 /* New way. */
10933 if (current_tune->insn_extra_cost)
10934 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10935 (enum rtx_code) outer_code,
10936 current_tune->insn_extra_cost,
10937 total, speed);
10938 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10939 && current_tune->insn_extra_cost != NULL */
10940 else
10941 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10942 (enum rtx_code) outer_code,
10943 &generic_extra_costs, total, speed);
10946 if (dump_file && (dump_flags & TDF_DETAILS))
10948 print_rtl_single (dump_file, x);
10949 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10950 *total, result ? "final" : "partial");
10952 return result;
10955 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10956 supported on any "slowmul" cores, so it can be ignored. */
10958 static bool
10959 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10960 int *total, bool speed)
10962 enum machine_mode mode = GET_MODE (x);
10964 if (TARGET_THUMB)
10966 *total = thumb1_rtx_costs (x, code, outer_code);
10967 return true;
10970 switch (code)
10972 case MULT:
10973 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10974 || mode == DImode)
10976 *total = COSTS_N_INSNS (20);
10977 return false;
10980 if (CONST_INT_P (XEXP (x, 1)))
10982 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10983 & (unsigned HOST_WIDE_INT) 0xffffffff);
10984 int cost, const_ok = const_ok_for_arm (i);
10985 int j, booth_unit_size;
10987 /* Tune as appropriate. */
10988 cost = const_ok ? 4 : 8;
10989 booth_unit_size = 2;
10990 for (j = 0; i && j < 32; j += booth_unit_size)
10992 i >>= booth_unit_size;
10993 cost++;
10996 *total = COSTS_N_INSNS (cost);
10997 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10998 return true;
11001 *total = COSTS_N_INSNS (20);
11002 return false;
11004 default:
11005 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11010 /* RTX cost for cores with a fast multiply unit (M variants). */
11012 static bool
11013 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11014 int *total, bool speed)
11016 enum machine_mode mode = GET_MODE (x);
11018 if (TARGET_THUMB1)
11020 *total = thumb1_rtx_costs (x, code, outer_code);
11021 return true;
11024 /* ??? should thumb2 use different costs? */
11025 switch (code)
11027 case MULT:
11028 /* There is no point basing this on the tuning, since it is always the
11029 fast variant if it exists at all. */
11030 if (mode == DImode
11031 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11032 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11033 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11035 *total = COSTS_N_INSNS(2);
11036 return false;
11040 if (mode == DImode)
11042 *total = COSTS_N_INSNS (5);
11043 return false;
11046 if (CONST_INT_P (XEXP (x, 1)))
11048 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11049 & (unsigned HOST_WIDE_INT) 0xffffffff);
11050 int cost, const_ok = const_ok_for_arm (i);
11051 int j, booth_unit_size;
11053 /* Tune as appropriate. */
11054 cost = const_ok ? 4 : 8;
11055 booth_unit_size = 8;
11056 for (j = 0; i && j < 32; j += booth_unit_size)
11058 i >>= booth_unit_size;
11059 cost++;
11062 *total = COSTS_N_INSNS(cost);
11063 return false;
11066 if (mode == SImode)
11068 *total = COSTS_N_INSNS (4);
11069 return false;
11072 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11074 if (TARGET_HARD_FLOAT
11075 && (mode == SFmode
11076 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11078 *total = COSTS_N_INSNS (1);
11079 return false;
11083 /* Requires a lib call */
11084 *total = COSTS_N_INSNS (20);
11085 return false;
11087 default:
11088 return arm_rtx_costs_1 (x, outer_code, total, speed);
11093 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11094 so it can be ignored. */
11096 static bool
11097 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11098 int *total, bool speed)
11100 enum machine_mode mode = GET_MODE (x);
11102 if (TARGET_THUMB)
11104 *total = thumb1_rtx_costs (x, code, outer_code);
11105 return true;
11108 switch (code)
11110 case COMPARE:
11111 if (GET_CODE (XEXP (x, 0)) != MULT)
11112 return arm_rtx_costs_1 (x, outer_code, total, speed);
11114 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11115 will stall until the multiplication is complete. */
11116 *total = COSTS_N_INSNS (3);
11117 return false;
11119 case MULT:
11120 /* There is no point basing this on the tuning, since it is always the
11121 fast variant if it exists at all. */
11122 if (mode == DImode
11123 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11124 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11125 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11127 *total = COSTS_N_INSNS (2);
11128 return false;
11132 if (mode == DImode)
11134 *total = COSTS_N_INSNS (5);
11135 return false;
11138 if (CONST_INT_P (XEXP (x, 1)))
11140 /* If operand 1 is a constant we can more accurately
11141 calculate the cost of the multiply. The multiplier can
11142 retire 15 bits on the first cycle and a further 12 on the
11143 second. We do, of course, have to load the constant into
11144 a register first. */
11145 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11146 /* There's a general overhead of one cycle. */
11147 int cost = 1;
11148 unsigned HOST_WIDE_INT masked_const;
11150 if (i & 0x80000000)
11151 i = ~i;
11153 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11155 masked_const = i & 0xffff8000;
11156 if (masked_const != 0)
11158 cost++;
11159 masked_const = i & 0xf8000000;
11160 if (masked_const != 0)
11161 cost++;
11163 *total = COSTS_N_INSNS (cost);
11164 return false;
11167 if (mode == SImode)
11169 *total = COSTS_N_INSNS (3);
11170 return false;
11173 /* Requires a lib call */
11174 *total = COSTS_N_INSNS (20);
11175 return false;
11177 default:
11178 return arm_rtx_costs_1 (x, outer_code, total, speed);
11183 /* RTX costs for 9e (and later) cores. */
11185 static bool
11186 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11187 int *total, bool speed)
11189 enum machine_mode mode = GET_MODE (x);
11191 if (TARGET_THUMB1)
11193 switch (code)
11195 case MULT:
11196 *total = COSTS_N_INSNS (3);
11197 return true;
11199 default:
11200 *total = thumb1_rtx_costs (x, code, outer_code);
11201 return true;
11205 switch (code)
11207 case MULT:
11208 /* There is no point basing this on the tuning, since it is always the
11209 fast variant if it exists at all. */
11210 if (mode == DImode
11211 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11212 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11213 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11215 *total = COSTS_N_INSNS (2);
11216 return false;
11220 if (mode == DImode)
11222 *total = COSTS_N_INSNS (5);
11223 return false;
11226 if (mode == SImode)
11228 *total = COSTS_N_INSNS (2);
11229 return false;
11232 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11234 if (TARGET_HARD_FLOAT
11235 && (mode == SFmode
11236 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11238 *total = COSTS_N_INSNS (1);
11239 return false;
11243 *total = COSTS_N_INSNS (20);
11244 return false;
11246 default:
11247 return arm_rtx_costs_1 (x, outer_code, total, speed);
11250 /* All address computations that can be done are free, but rtx cost returns
11251 the same for practically all of them. So we weight the different types
11252 of address here in the order (most pref first):
11253 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11254 static inline int
11255 arm_arm_address_cost (rtx x)
11257 enum rtx_code c = GET_CODE (x);
11259 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11260 return 0;
11261 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11262 return 10;
11264 if (c == PLUS)
11266 if (CONST_INT_P (XEXP (x, 1)))
11267 return 2;
11269 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11270 return 3;
11272 return 4;
11275 return 6;
11278 static inline int
11279 arm_thumb_address_cost (rtx x)
11281 enum rtx_code c = GET_CODE (x);
11283 if (c == REG)
11284 return 1;
11285 if (c == PLUS
11286 && REG_P (XEXP (x, 0))
11287 && CONST_INT_P (XEXP (x, 1)))
11288 return 1;
11290 return 2;
11293 static int
11294 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11295 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11297 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11300 /* Adjust cost hook for XScale. */
11301 static bool
11302 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11304 /* Some true dependencies can have a higher cost depending
11305 on precisely how certain input operands are used. */
11306 if (REG_NOTE_KIND(link) == 0
11307 && recog_memoized (insn) >= 0
11308 && recog_memoized (dep) >= 0)
11310 int shift_opnum = get_attr_shift (insn);
11311 enum attr_type attr_type = get_attr_type (dep);
11313 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11314 operand for INSN. If we have a shifted input operand and the
11315 instruction we depend on is another ALU instruction, then we may
11316 have to account for an additional stall. */
11317 if (shift_opnum != 0
11318 && (attr_type == TYPE_ALU_SHIFT_IMM
11319 || attr_type == TYPE_ALUS_SHIFT_IMM
11320 || attr_type == TYPE_LOGIC_SHIFT_IMM
11321 || attr_type == TYPE_LOGICS_SHIFT_IMM
11322 || attr_type == TYPE_ALU_SHIFT_REG
11323 || attr_type == TYPE_ALUS_SHIFT_REG
11324 || attr_type == TYPE_LOGIC_SHIFT_REG
11325 || attr_type == TYPE_LOGICS_SHIFT_REG
11326 || attr_type == TYPE_MOV_SHIFT
11327 || attr_type == TYPE_MVN_SHIFT
11328 || attr_type == TYPE_MOV_SHIFT_REG
11329 || attr_type == TYPE_MVN_SHIFT_REG))
11331 rtx shifted_operand;
11332 int opno;
11334 /* Get the shifted operand. */
11335 extract_insn (insn);
11336 shifted_operand = recog_data.operand[shift_opnum];
11338 /* Iterate over all the operands in DEP. If we write an operand
11339 that overlaps with SHIFTED_OPERAND, then we have increase the
11340 cost of this dependency. */
11341 extract_insn (dep);
11342 preprocess_constraints (dep);
11343 for (opno = 0; opno < recog_data.n_operands; opno++)
11345 /* We can ignore strict inputs. */
11346 if (recog_data.operand_type[opno] == OP_IN)
11347 continue;
11349 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11350 shifted_operand))
11352 *cost = 2;
11353 return false;
11358 return true;
11361 /* Adjust cost hook for Cortex A9. */
11362 static bool
11363 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11365 switch (REG_NOTE_KIND (link))
11367 case REG_DEP_ANTI:
11368 *cost = 0;
11369 return false;
11371 case REG_DEP_TRUE:
11372 case REG_DEP_OUTPUT:
11373 if (recog_memoized (insn) >= 0
11374 && recog_memoized (dep) >= 0)
11376 if (GET_CODE (PATTERN (insn)) == SET)
11378 if (GET_MODE_CLASS
11379 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11380 || GET_MODE_CLASS
11381 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11383 enum attr_type attr_type_insn = get_attr_type (insn);
11384 enum attr_type attr_type_dep = get_attr_type (dep);
11386 /* By default all dependencies of the form
11387 s0 = s0 <op> s1
11388 s0 = s0 <op> s2
11389 have an extra latency of 1 cycle because
11390 of the input and output dependency in this
11391 case. However this gets modeled as an true
11392 dependency and hence all these checks. */
11393 if (REG_P (SET_DEST (PATTERN (insn)))
11394 && REG_P (SET_DEST (PATTERN (dep)))
11395 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11396 SET_DEST (PATTERN (dep))))
11398 /* FMACS is a special case where the dependent
11399 instruction can be issued 3 cycles before
11400 the normal latency in case of an output
11401 dependency. */
11402 if ((attr_type_insn == TYPE_FMACS
11403 || attr_type_insn == TYPE_FMACD)
11404 && (attr_type_dep == TYPE_FMACS
11405 || attr_type_dep == TYPE_FMACD))
11407 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11408 *cost = insn_default_latency (dep) - 3;
11409 else
11410 *cost = insn_default_latency (dep);
11411 return false;
11413 else
11415 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11416 *cost = insn_default_latency (dep) + 1;
11417 else
11418 *cost = insn_default_latency (dep);
11420 return false;
11425 break;
11427 default:
11428 gcc_unreachable ();
11431 return true;
11434 /* Adjust cost hook for FA726TE. */
11435 static bool
11436 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11438 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11439 have penalty of 3. */
11440 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11441 && recog_memoized (insn) >= 0
11442 && recog_memoized (dep) >= 0
11443 && get_attr_conds (dep) == CONDS_SET)
11445 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11446 if (get_attr_conds (insn) == CONDS_USE
11447 && get_attr_type (insn) != TYPE_BRANCH)
11449 *cost = 3;
11450 return false;
11453 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11454 || get_attr_conds (insn) == CONDS_USE)
11456 *cost = 0;
11457 return false;
11461 return true;
11464 /* Implement TARGET_REGISTER_MOVE_COST.
11466 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11467 it is typically more expensive than a single memory access. We set
11468 the cost to less than two memory accesses so that floating
11469 point to integer conversion does not go through memory. */
11472 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11473 reg_class_t from, reg_class_t to)
11475 if (TARGET_32BIT)
11477 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11478 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11479 return 15;
11480 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11481 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11482 return 4;
11483 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11484 return 20;
11485 else
11486 return 2;
11488 else
11490 if (from == HI_REGS || to == HI_REGS)
11491 return 4;
11492 else
11493 return 2;
11497 /* Implement TARGET_MEMORY_MOVE_COST. */
11500 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11501 bool in ATTRIBUTE_UNUSED)
11503 if (TARGET_32BIT)
11504 return 10;
11505 else
11507 if (GET_MODE_SIZE (mode) < 4)
11508 return 8;
11509 else
11510 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11514 /* Vectorizer cost model implementation. */
11516 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11517 static int
11518 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11519 tree vectype,
11520 int misalign ATTRIBUTE_UNUSED)
11522 unsigned elements;
11524 switch (type_of_cost)
11526 case scalar_stmt:
11527 return current_tune->vec_costs->scalar_stmt_cost;
11529 case scalar_load:
11530 return current_tune->vec_costs->scalar_load_cost;
11532 case scalar_store:
11533 return current_tune->vec_costs->scalar_store_cost;
11535 case vector_stmt:
11536 return current_tune->vec_costs->vec_stmt_cost;
11538 case vector_load:
11539 return current_tune->vec_costs->vec_align_load_cost;
11541 case vector_store:
11542 return current_tune->vec_costs->vec_store_cost;
11544 case vec_to_scalar:
11545 return current_tune->vec_costs->vec_to_scalar_cost;
11547 case scalar_to_vec:
11548 return current_tune->vec_costs->scalar_to_vec_cost;
11550 case unaligned_load:
11551 return current_tune->vec_costs->vec_unalign_load_cost;
11553 case unaligned_store:
11554 return current_tune->vec_costs->vec_unalign_store_cost;
11556 case cond_branch_taken:
11557 return current_tune->vec_costs->cond_taken_branch_cost;
11559 case cond_branch_not_taken:
11560 return current_tune->vec_costs->cond_not_taken_branch_cost;
11562 case vec_perm:
11563 case vec_promote_demote:
11564 return current_tune->vec_costs->vec_stmt_cost;
11566 case vec_construct:
11567 elements = TYPE_VECTOR_SUBPARTS (vectype);
11568 return elements / 2 + 1;
11570 default:
11571 gcc_unreachable ();
11575 /* Implement targetm.vectorize.add_stmt_cost. */
11577 static unsigned
11578 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11579 struct _stmt_vec_info *stmt_info, int misalign,
11580 enum vect_cost_model_location where)
11582 unsigned *cost = (unsigned *) data;
11583 unsigned retval = 0;
11585 if (flag_vect_cost_model)
11587 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11588 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11590 /* Statements in an inner loop relative to the loop being
11591 vectorized are weighted more heavily. The value here is
11592 arbitrary and could potentially be improved with analysis. */
11593 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11594 count *= 50; /* FIXME. */
11596 retval = (unsigned) (count * stmt_cost);
11597 cost[where] += retval;
11600 return retval;
11603 /* Return true if and only if this insn can dual-issue only as older. */
11604 static bool
11605 cortexa7_older_only (rtx insn)
11607 if (recog_memoized (insn) < 0)
11608 return false;
11610 switch (get_attr_type (insn))
11612 case TYPE_ALU_REG:
11613 case TYPE_ALUS_REG:
11614 case TYPE_LOGIC_REG:
11615 case TYPE_LOGICS_REG:
11616 case TYPE_ADC_REG:
11617 case TYPE_ADCS_REG:
11618 case TYPE_ADR:
11619 case TYPE_BFM:
11620 case TYPE_REV:
11621 case TYPE_MVN_REG:
11622 case TYPE_SHIFT_IMM:
11623 case TYPE_SHIFT_REG:
11624 case TYPE_LOAD_BYTE:
11625 case TYPE_LOAD1:
11626 case TYPE_STORE1:
11627 case TYPE_FFARITHS:
11628 case TYPE_FADDS:
11629 case TYPE_FFARITHD:
11630 case TYPE_FADDD:
11631 case TYPE_FMOV:
11632 case TYPE_F_CVT:
11633 case TYPE_FCMPS:
11634 case TYPE_FCMPD:
11635 case TYPE_FCONSTS:
11636 case TYPE_FCONSTD:
11637 case TYPE_FMULS:
11638 case TYPE_FMACS:
11639 case TYPE_FMULD:
11640 case TYPE_FMACD:
11641 case TYPE_FDIVS:
11642 case TYPE_FDIVD:
11643 case TYPE_F_MRC:
11644 case TYPE_F_MRRC:
11645 case TYPE_F_FLAG:
11646 case TYPE_F_LOADS:
11647 case TYPE_F_STORES:
11648 return true;
11649 default:
11650 return false;
11654 /* Return true if and only if this insn can dual-issue as younger. */
11655 static bool
11656 cortexa7_younger (FILE *file, int verbose, rtx insn)
11658 if (recog_memoized (insn) < 0)
11660 if (verbose > 5)
11661 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11662 return false;
11665 switch (get_attr_type (insn))
11667 case TYPE_ALU_IMM:
11668 case TYPE_ALUS_IMM:
11669 case TYPE_LOGIC_IMM:
11670 case TYPE_LOGICS_IMM:
11671 case TYPE_EXTEND:
11672 case TYPE_MVN_IMM:
11673 case TYPE_MOV_IMM:
11674 case TYPE_MOV_REG:
11675 case TYPE_MOV_SHIFT:
11676 case TYPE_MOV_SHIFT_REG:
11677 case TYPE_BRANCH:
11678 case TYPE_CALL:
11679 return true;
11680 default:
11681 return false;
11686 /* Look for an instruction that can dual issue only as an older
11687 instruction, and move it in front of any instructions that can
11688 dual-issue as younger, while preserving the relative order of all
11689 other instructions in the ready list. This is a hueuristic to help
11690 dual-issue in later cycles, by postponing issue of more flexible
11691 instructions. This heuristic may affect dual issue opportunities
11692 in the current cycle. */
11693 static void
11694 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11695 int clock)
11697 int i;
11698 int first_older_only = -1, first_younger = -1;
11700 if (verbose > 5)
11701 fprintf (file,
11702 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11703 clock,
11704 *n_readyp);
11706 /* Traverse the ready list from the head (the instruction to issue
11707 first), and looking for the first instruction that can issue as
11708 younger and the first instruction that can dual-issue only as
11709 older. */
11710 for (i = *n_readyp - 1; i >= 0; i--)
11712 rtx insn = ready[i];
11713 if (cortexa7_older_only (insn))
11715 first_older_only = i;
11716 if (verbose > 5)
11717 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11718 break;
11720 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11721 first_younger = i;
11724 /* Nothing to reorder because either no younger insn found or insn
11725 that can dual-issue only as older appears before any insn that
11726 can dual-issue as younger. */
11727 if (first_younger == -1)
11729 if (verbose > 5)
11730 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11731 return;
11734 /* Nothing to reorder because no older-only insn in the ready list. */
11735 if (first_older_only == -1)
11737 if (verbose > 5)
11738 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11739 return;
11742 /* Move first_older_only insn before first_younger. */
11743 if (verbose > 5)
11744 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11745 INSN_UID(ready [first_older_only]),
11746 INSN_UID(ready [first_younger]));
11747 rtx first_older_only_insn = ready [first_older_only];
11748 for (i = first_older_only; i < first_younger; i++)
11750 ready[i] = ready[i+1];
11753 ready[i] = first_older_only_insn;
11754 return;
11757 /* Implement TARGET_SCHED_REORDER. */
11758 static int
11759 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11760 int clock)
11762 switch (arm_tune)
11764 case cortexa7:
11765 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11766 break;
11767 default:
11768 /* Do nothing for other cores. */
11769 break;
11772 return arm_issue_rate ();
11775 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11776 It corrects the value of COST based on the relationship between
11777 INSN and DEP through the dependence LINK. It returns the new
11778 value. There is a per-core adjust_cost hook to adjust scheduler costs
11779 and the per-core hook can choose to completely override the generic
11780 adjust_cost function. Only put bits of code into arm_adjust_cost that
11781 are common across all cores. */
11782 static int
11783 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11785 rtx i_pat, d_pat;
11787 /* When generating Thumb-1 code, we want to place flag-setting operations
11788 close to a conditional branch which depends on them, so that we can
11789 omit the comparison. */
11790 if (TARGET_THUMB1
11791 && REG_NOTE_KIND (link) == 0
11792 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11793 && recog_memoized (dep) >= 0
11794 && get_attr_conds (dep) == CONDS_SET)
11795 return 0;
11797 if (current_tune->sched_adjust_cost != NULL)
11799 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11800 return cost;
11803 /* XXX Is this strictly true? */
11804 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11805 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11806 return 0;
11808 /* Call insns don't incur a stall, even if they follow a load. */
11809 if (REG_NOTE_KIND (link) == 0
11810 && CALL_P (insn))
11811 return 1;
11813 if ((i_pat = single_set (insn)) != NULL
11814 && MEM_P (SET_SRC (i_pat))
11815 && (d_pat = single_set (dep)) != NULL
11816 && MEM_P (SET_DEST (d_pat)))
11818 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11819 /* This is a load after a store, there is no conflict if the load reads
11820 from a cached area. Assume that loads from the stack, and from the
11821 constant pool are cached, and that others will miss. This is a
11822 hack. */
11824 if ((GET_CODE (src_mem) == SYMBOL_REF
11825 && CONSTANT_POOL_ADDRESS_P (src_mem))
11826 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11827 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11828 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11829 return 1;
11832 return cost;
11836 arm_max_conditional_execute (void)
11838 return max_insns_skipped;
11841 static int
11842 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11844 if (TARGET_32BIT)
11845 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11846 else
11847 return (optimize > 0) ? 2 : 0;
11850 static int
11851 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11853 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11856 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11857 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11858 sequences of non-executed instructions in IT blocks probably take the same
11859 amount of time as executed instructions (and the IT instruction itself takes
11860 space in icache). This function was experimentally determined to give good
11861 results on a popular embedded benchmark. */
11863 static int
11864 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11866 return (TARGET_32BIT && speed_p) ? 1
11867 : arm_default_branch_cost (speed_p, predictable_p);
11870 static bool fp_consts_inited = false;
11872 static REAL_VALUE_TYPE value_fp0;
11874 static void
11875 init_fp_table (void)
11877 REAL_VALUE_TYPE r;
11879 r = REAL_VALUE_ATOF ("0", DFmode);
11880 value_fp0 = r;
11881 fp_consts_inited = true;
11884 /* Return TRUE if rtx X is a valid immediate FP constant. */
11886 arm_const_double_rtx (rtx x)
11888 REAL_VALUE_TYPE r;
11890 if (!fp_consts_inited)
11891 init_fp_table ();
11893 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11894 if (REAL_VALUE_MINUS_ZERO (r))
11895 return 0;
11897 if (REAL_VALUES_EQUAL (r, value_fp0))
11898 return 1;
11900 return 0;
11903 /* VFPv3 has a fairly wide range of representable immediates, formed from
11904 "quarter-precision" floating-point values. These can be evaluated using this
11905 formula (with ^ for exponentiation):
11907 -1^s * n * 2^-r
11909 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11910 16 <= n <= 31 and 0 <= r <= 7.
11912 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11914 - A (most-significant) is the sign bit.
11915 - BCD are the exponent (encoded as r XOR 3).
11916 - EFGH are the mantissa (encoded as n - 16).
11919 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11920 fconst[sd] instruction, or -1 if X isn't suitable. */
11921 static int
11922 vfp3_const_double_index (rtx x)
11924 REAL_VALUE_TYPE r, m;
11925 int sign, exponent;
11926 unsigned HOST_WIDE_INT mantissa, mant_hi;
11927 unsigned HOST_WIDE_INT mask;
11928 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11929 bool fail;
11931 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11932 return -1;
11934 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11936 /* We can't represent these things, so detect them first. */
11937 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11938 return -1;
11940 /* Extract sign, exponent and mantissa. */
11941 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11942 r = real_value_abs (&r);
11943 exponent = REAL_EXP (&r);
11944 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11945 highest (sign) bit, with a fixed binary point at bit point_pos.
11946 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11947 bits for the mantissa, this may fail (low bits would be lost). */
11948 real_ldexp (&m, &r, point_pos - exponent);
11949 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11950 mantissa = w.elt (0);
11951 mant_hi = w.elt (1);
11953 /* If there are bits set in the low part of the mantissa, we can't
11954 represent this value. */
11955 if (mantissa != 0)
11956 return -1;
11958 /* Now make it so that mantissa contains the most-significant bits, and move
11959 the point_pos to indicate that the least-significant bits have been
11960 discarded. */
11961 point_pos -= HOST_BITS_PER_WIDE_INT;
11962 mantissa = mant_hi;
11964 /* We can permit four significant bits of mantissa only, plus a high bit
11965 which is always 1. */
11966 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11967 if ((mantissa & mask) != 0)
11968 return -1;
11970 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11971 mantissa >>= point_pos - 5;
11973 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11974 floating-point immediate zero with Neon using an integer-zero load, but
11975 that case is handled elsewhere.) */
11976 if (mantissa == 0)
11977 return -1;
11979 gcc_assert (mantissa >= 16 && mantissa <= 31);
11981 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11982 normalized significands are in the range [1, 2). (Our mantissa is shifted
11983 left 4 places at this point relative to normalized IEEE754 values). GCC
11984 internally uses [0.5, 1) (see real.c), so the exponent returned from
11985 REAL_EXP must be altered. */
11986 exponent = 5 - exponent;
11988 if (exponent < 0 || exponent > 7)
11989 return -1;
11991 /* Sign, mantissa and exponent are now in the correct form to plug into the
11992 formula described in the comment above. */
11993 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11996 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11998 vfp3_const_double_rtx (rtx x)
12000 if (!TARGET_VFP3)
12001 return 0;
12003 return vfp3_const_double_index (x) != -1;
12006 /* Recognize immediates which can be used in various Neon instructions. Legal
12007 immediates are described by the following table (for VMVN variants, the
12008 bitwise inverse of the constant shown is recognized. In either case, VMOV
12009 is output and the correct instruction to use for a given constant is chosen
12010 by the assembler). The constant shown is replicated across all elements of
12011 the destination vector.
12013 insn elems variant constant (binary)
12014 ---- ----- ------- -----------------
12015 vmov i32 0 00000000 00000000 00000000 abcdefgh
12016 vmov i32 1 00000000 00000000 abcdefgh 00000000
12017 vmov i32 2 00000000 abcdefgh 00000000 00000000
12018 vmov i32 3 abcdefgh 00000000 00000000 00000000
12019 vmov i16 4 00000000 abcdefgh
12020 vmov i16 5 abcdefgh 00000000
12021 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12022 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12023 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12024 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12025 vmvn i16 10 00000000 abcdefgh
12026 vmvn i16 11 abcdefgh 00000000
12027 vmov i32 12 00000000 00000000 abcdefgh 11111111
12028 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12029 vmov i32 14 00000000 abcdefgh 11111111 11111111
12030 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12031 vmov i8 16 abcdefgh
12032 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12033 eeeeeeee ffffffff gggggggg hhhhhhhh
12034 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12035 vmov f32 19 00000000 00000000 00000000 00000000
12037 For case 18, B = !b. Representable values are exactly those accepted by
12038 vfp3_const_double_index, but are output as floating-point numbers rather
12039 than indices.
12041 For case 19, we will change it to vmov.i32 when assembling.
12043 Variants 0-5 (inclusive) may also be used as immediates for the second
12044 operand of VORR/VBIC instructions.
12046 The INVERSE argument causes the bitwise inverse of the given operand to be
12047 recognized instead (used for recognizing legal immediates for the VAND/VORN
12048 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12049 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12050 output, rather than the real insns vbic/vorr).
12052 INVERSE makes no difference to the recognition of float vectors.
12054 The return value is the variant of immediate as shown in the above table, or
12055 -1 if the given value doesn't match any of the listed patterns.
12057 static int
12058 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12059 rtx *modconst, int *elementwidth)
12061 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12062 matches = 1; \
12063 for (i = 0; i < idx; i += (STRIDE)) \
12064 if (!(TEST)) \
12065 matches = 0; \
12066 if (matches) \
12068 immtype = (CLASS); \
12069 elsize = (ELSIZE); \
12070 break; \
12073 unsigned int i, elsize = 0, idx = 0, n_elts;
12074 unsigned int innersize;
12075 unsigned char bytes[16];
12076 int immtype = -1, matches;
12077 unsigned int invmask = inverse ? 0xff : 0;
12078 bool vector = GET_CODE (op) == CONST_VECTOR;
12080 if (vector)
12082 n_elts = CONST_VECTOR_NUNITS (op);
12083 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12085 else
12087 n_elts = 1;
12088 if (mode == VOIDmode)
12089 mode = DImode;
12090 innersize = GET_MODE_SIZE (mode);
12093 /* Vectors of float constants. */
12094 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12096 rtx el0 = CONST_VECTOR_ELT (op, 0);
12097 REAL_VALUE_TYPE r0;
12099 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12100 return -1;
12102 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12104 for (i = 1; i < n_elts; i++)
12106 rtx elt = CONST_VECTOR_ELT (op, i);
12107 REAL_VALUE_TYPE re;
12109 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12111 if (!REAL_VALUES_EQUAL (r0, re))
12112 return -1;
12115 if (modconst)
12116 *modconst = CONST_VECTOR_ELT (op, 0);
12118 if (elementwidth)
12119 *elementwidth = 0;
12121 if (el0 == CONST0_RTX (GET_MODE (el0)))
12122 return 19;
12123 else
12124 return 18;
12127 /* Splat vector constant out into a byte vector. */
12128 for (i = 0; i < n_elts; i++)
12130 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12131 unsigned HOST_WIDE_INT elpart;
12132 unsigned int part, parts;
12134 if (CONST_INT_P (el))
12136 elpart = INTVAL (el);
12137 parts = 1;
12139 else if (CONST_DOUBLE_P (el))
12141 elpart = CONST_DOUBLE_LOW (el);
12142 parts = 2;
12144 else
12145 gcc_unreachable ();
12147 for (part = 0; part < parts; part++)
12149 unsigned int byte;
12150 for (byte = 0; byte < innersize; byte++)
12152 bytes[idx++] = (elpart & 0xff) ^ invmask;
12153 elpart >>= BITS_PER_UNIT;
12155 if (CONST_DOUBLE_P (el))
12156 elpart = CONST_DOUBLE_HIGH (el);
12160 /* Sanity check. */
12161 gcc_assert (idx == GET_MODE_SIZE (mode));
12165 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12166 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12168 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12169 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12171 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12172 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12174 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12175 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12177 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12179 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12181 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12182 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12184 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12185 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12187 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12188 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12190 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12191 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12193 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12195 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12197 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12198 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12200 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12201 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12203 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12204 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12206 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12207 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12209 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12211 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12212 && bytes[i] == bytes[(i + 8) % idx]);
12214 while (0);
12216 if (immtype == -1)
12217 return -1;
12219 if (elementwidth)
12220 *elementwidth = elsize;
12222 if (modconst)
12224 unsigned HOST_WIDE_INT imm = 0;
12226 /* Un-invert bytes of recognized vector, if necessary. */
12227 if (invmask != 0)
12228 for (i = 0; i < idx; i++)
12229 bytes[i] ^= invmask;
12231 if (immtype == 17)
12233 /* FIXME: Broken on 32-bit H_W_I hosts. */
12234 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12236 for (i = 0; i < 8; i++)
12237 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12238 << (i * BITS_PER_UNIT);
12240 *modconst = GEN_INT (imm);
12242 else
12244 unsigned HOST_WIDE_INT imm = 0;
12246 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12247 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12249 *modconst = GEN_INT (imm);
12253 return immtype;
12254 #undef CHECK
12257 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12258 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12259 float elements), and a modified constant (whatever should be output for a
12260 VMOV) in *MODCONST. */
12263 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12264 rtx *modconst, int *elementwidth)
12266 rtx tmpconst;
12267 int tmpwidth;
12268 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12270 if (retval == -1)
12271 return 0;
12273 if (modconst)
12274 *modconst = tmpconst;
12276 if (elementwidth)
12277 *elementwidth = tmpwidth;
12279 return 1;
12282 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12283 the immediate is valid, write a constant suitable for using as an operand
12284 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12285 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12288 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12289 rtx *modconst, int *elementwidth)
12291 rtx tmpconst;
12292 int tmpwidth;
12293 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12295 if (retval < 0 || retval > 5)
12296 return 0;
12298 if (modconst)
12299 *modconst = tmpconst;
12301 if (elementwidth)
12302 *elementwidth = tmpwidth;
12304 return 1;
12307 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12308 the immediate is valid, write a constant suitable for using as an operand
12309 to VSHR/VSHL to *MODCONST and the corresponding element width to
12310 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12311 because they have different limitations. */
12314 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12315 rtx *modconst, int *elementwidth,
12316 bool isleftshift)
12318 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12319 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12320 unsigned HOST_WIDE_INT last_elt = 0;
12321 unsigned HOST_WIDE_INT maxshift;
12323 /* Split vector constant out into a byte vector. */
12324 for (i = 0; i < n_elts; i++)
12326 rtx el = CONST_VECTOR_ELT (op, i);
12327 unsigned HOST_WIDE_INT elpart;
12329 if (CONST_INT_P (el))
12330 elpart = INTVAL (el);
12331 else if (CONST_DOUBLE_P (el))
12332 return 0;
12333 else
12334 gcc_unreachable ();
12336 if (i != 0 && elpart != last_elt)
12337 return 0;
12339 last_elt = elpart;
12342 /* Shift less than element size. */
12343 maxshift = innersize * 8;
12345 if (isleftshift)
12347 /* Left shift immediate value can be from 0 to <size>-1. */
12348 if (last_elt >= maxshift)
12349 return 0;
12351 else
12353 /* Right shift immediate value can be from 1 to <size>. */
12354 if (last_elt == 0 || last_elt > maxshift)
12355 return 0;
12358 if (elementwidth)
12359 *elementwidth = innersize * 8;
12361 if (modconst)
12362 *modconst = CONST_VECTOR_ELT (op, 0);
12364 return 1;
12367 /* Return a string suitable for output of Neon immediate logic operation
12368 MNEM. */
12370 char *
12371 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12372 int inverse, int quad)
12374 int width, is_valid;
12375 static char templ[40];
12377 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12379 gcc_assert (is_valid != 0);
12381 if (quad)
12382 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12383 else
12384 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12386 return templ;
12389 /* Return a string suitable for output of Neon immediate shift operation
12390 (VSHR or VSHL) MNEM. */
12392 char *
12393 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12394 enum machine_mode mode, int quad,
12395 bool isleftshift)
12397 int width, is_valid;
12398 static char templ[40];
12400 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12401 gcc_assert (is_valid != 0);
12403 if (quad)
12404 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12405 else
12406 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12408 return templ;
12411 /* Output a sequence of pairwise operations to implement a reduction.
12412 NOTE: We do "too much work" here, because pairwise operations work on two
12413 registers-worth of operands in one go. Unfortunately we can't exploit those
12414 extra calculations to do the full operation in fewer steps, I don't think.
12415 Although all vector elements of the result but the first are ignored, we
12416 actually calculate the same result in each of the elements. An alternative
12417 such as initially loading a vector with zero to use as each of the second
12418 operands would use up an additional register and take an extra instruction,
12419 for no particular gain. */
12421 void
12422 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12423 rtx (*reduc) (rtx, rtx, rtx))
12425 enum machine_mode inner = GET_MODE_INNER (mode);
12426 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12427 rtx tmpsum = op1;
12429 for (i = parts / 2; i >= 1; i /= 2)
12431 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12432 emit_insn (reduc (dest, tmpsum, tmpsum));
12433 tmpsum = dest;
12437 /* If VALS is a vector constant that can be loaded into a register
12438 using VDUP, generate instructions to do so and return an RTX to
12439 assign to the register. Otherwise return NULL_RTX. */
12441 static rtx
12442 neon_vdup_constant (rtx vals)
12444 enum machine_mode mode = GET_MODE (vals);
12445 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12446 int n_elts = GET_MODE_NUNITS (mode);
12447 bool all_same = true;
12448 rtx x;
12449 int i;
12451 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12452 return NULL_RTX;
12454 for (i = 0; i < n_elts; ++i)
12456 x = XVECEXP (vals, 0, i);
12457 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12458 all_same = false;
12461 if (!all_same)
12462 /* The elements are not all the same. We could handle repeating
12463 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12464 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12465 vdup.i16). */
12466 return NULL_RTX;
12468 /* We can load this constant by using VDUP and a constant in a
12469 single ARM register. This will be cheaper than a vector
12470 load. */
12472 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12473 return gen_rtx_VEC_DUPLICATE (mode, x);
12476 /* Generate code to load VALS, which is a PARALLEL containing only
12477 constants (for vec_init) or CONST_VECTOR, efficiently into a
12478 register. Returns an RTX to copy into the register, or NULL_RTX
12479 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12482 neon_make_constant (rtx vals)
12484 enum machine_mode mode = GET_MODE (vals);
12485 rtx target;
12486 rtx const_vec = NULL_RTX;
12487 int n_elts = GET_MODE_NUNITS (mode);
12488 int n_const = 0;
12489 int i;
12491 if (GET_CODE (vals) == CONST_VECTOR)
12492 const_vec = vals;
12493 else if (GET_CODE (vals) == PARALLEL)
12495 /* A CONST_VECTOR must contain only CONST_INTs and
12496 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12497 Only store valid constants in a CONST_VECTOR. */
12498 for (i = 0; i < n_elts; ++i)
12500 rtx x = XVECEXP (vals, 0, i);
12501 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12502 n_const++;
12504 if (n_const == n_elts)
12505 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12507 else
12508 gcc_unreachable ();
12510 if (const_vec != NULL
12511 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12512 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12513 return const_vec;
12514 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12515 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12516 pipeline cycle; creating the constant takes one or two ARM
12517 pipeline cycles. */
12518 return target;
12519 else if (const_vec != NULL_RTX)
12520 /* Load from constant pool. On Cortex-A8 this takes two cycles
12521 (for either double or quad vectors). We can not take advantage
12522 of single-cycle VLD1 because we need a PC-relative addressing
12523 mode. */
12524 return const_vec;
12525 else
12526 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12527 We can not construct an initializer. */
12528 return NULL_RTX;
12531 /* Initialize vector TARGET to VALS. */
12533 void
12534 neon_expand_vector_init (rtx target, rtx vals)
12536 enum machine_mode mode = GET_MODE (target);
12537 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12538 int n_elts = GET_MODE_NUNITS (mode);
12539 int n_var = 0, one_var = -1;
12540 bool all_same = true;
12541 rtx x, mem;
12542 int i;
12544 for (i = 0; i < n_elts; ++i)
12546 x = XVECEXP (vals, 0, i);
12547 if (!CONSTANT_P (x))
12548 ++n_var, one_var = i;
12550 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12551 all_same = false;
12554 if (n_var == 0)
12556 rtx constant = neon_make_constant (vals);
12557 if (constant != NULL_RTX)
12559 emit_move_insn (target, constant);
12560 return;
12564 /* Splat a single non-constant element if we can. */
12565 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12567 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12568 emit_insn (gen_rtx_SET (VOIDmode, target,
12569 gen_rtx_VEC_DUPLICATE (mode, x)));
12570 return;
12573 /* One field is non-constant. Load constant then overwrite varying
12574 field. This is more efficient than using the stack. */
12575 if (n_var == 1)
12577 rtx copy = copy_rtx (vals);
12578 rtx index = GEN_INT (one_var);
12580 /* Load constant part of vector, substitute neighboring value for
12581 varying element. */
12582 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12583 neon_expand_vector_init (target, copy);
12585 /* Insert variable. */
12586 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12587 switch (mode)
12589 case V8QImode:
12590 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12591 break;
12592 case V16QImode:
12593 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12594 break;
12595 case V4HImode:
12596 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12597 break;
12598 case V8HImode:
12599 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12600 break;
12601 case V2SImode:
12602 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12603 break;
12604 case V4SImode:
12605 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12606 break;
12607 case V2SFmode:
12608 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12609 break;
12610 case V4SFmode:
12611 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12612 break;
12613 case V2DImode:
12614 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12615 break;
12616 default:
12617 gcc_unreachable ();
12619 return;
12622 /* Construct the vector in memory one field at a time
12623 and load the whole vector. */
12624 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12625 for (i = 0; i < n_elts; i++)
12626 emit_move_insn (adjust_address_nv (mem, inner_mode,
12627 i * GET_MODE_SIZE (inner_mode)),
12628 XVECEXP (vals, 0, i));
12629 emit_move_insn (target, mem);
12632 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12633 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12634 reported source locations are bogus. */
12636 static void
12637 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12638 const char *err)
12640 HOST_WIDE_INT lane;
12642 gcc_assert (CONST_INT_P (operand));
12644 lane = INTVAL (operand);
12646 if (lane < low || lane >= high)
12647 error (err);
12650 /* Bounds-check lanes. */
12652 void
12653 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12655 bounds_check (operand, low, high, "lane out of range");
12658 /* Bounds-check constants. */
12660 void
12661 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12663 bounds_check (operand, low, high, "constant out of range");
12666 HOST_WIDE_INT
12667 neon_element_bits (enum machine_mode mode)
12669 if (mode == DImode)
12670 return GET_MODE_BITSIZE (mode);
12671 else
12672 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12676 /* Predicates for `match_operand' and `match_operator'. */
12678 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12679 WB is true if full writeback address modes are allowed and is false
12680 if limited writeback address modes (POST_INC and PRE_DEC) are
12681 allowed. */
12684 arm_coproc_mem_operand (rtx op, bool wb)
12686 rtx ind;
12688 /* Reject eliminable registers. */
12689 if (! (reload_in_progress || reload_completed || lra_in_progress)
12690 && ( reg_mentioned_p (frame_pointer_rtx, op)
12691 || reg_mentioned_p (arg_pointer_rtx, op)
12692 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12693 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12694 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12695 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12696 return FALSE;
12698 /* Constants are converted into offsets from labels. */
12699 if (!MEM_P (op))
12700 return FALSE;
12702 ind = XEXP (op, 0);
12704 if (reload_completed
12705 && (GET_CODE (ind) == LABEL_REF
12706 || (GET_CODE (ind) == CONST
12707 && GET_CODE (XEXP (ind, 0)) == PLUS
12708 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12709 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12710 return TRUE;
12712 /* Match: (mem (reg)). */
12713 if (REG_P (ind))
12714 return arm_address_register_rtx_p (ind, 0);
12716 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12717 acceptable in any case (subject to verification by
12718 arm_address_register_rtx_p). We need WB to be true to accept
12719 PRE_INC and POST_DEC. */
12720 if (GET_CODE (ind) == POST_INC
12721 || GET_CODE (ind) == PRE_DEC
12722 || (wb
12723 && (GET_CODE (ind) == PRE_INC
12724 || GET_CODE (ind) == POST_DEC)))
12725 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12727 if (wb
12728 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12729 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12730 && GET_CODE (XEXP (ind, 1)) == PLUS
12731 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12732 ind = XEXP (ind, 1);
12734 /* Match:
12735 (plus (reg)
12736 (const)). */
12737 if (GET_CODE (ind) == PLUS
12738 && REG_P (XEXP (ind, 0))
12739 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12740 && CONST_INT_P (XEXP (ind, 1))
12741 && INTVAL (XEXP (ind, 1)) > -1024
12742 && INTVAL (XEXP (ind, 1)) < 1024
12743 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12744 return TRUE;
12746 return FALSE;
12749 /* Return TRUE if OP is a memory operand which we can load or store a vector
12750 to/from. TYPE is one of the following values:
12751 0 - Vector load/stor (vldr)
12752 1 - Core registers (ldm)
12753 2 - Element/structure loads (vld1)
12756 neon_vector_mem_operand (rtx op, int type, bool strict)
12758 rtx ind;
12760 /* Reject eliminable registers. */
12761 if (! (reload_in_progress || reload_completed)
12762 && ( reg_mentioned_p (frame_pointer_rtx, op)
12763 || reg_mentioned_p (arg_pointer_rtx, op)
12764 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12765 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12766 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12767 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12768 return !strict;
12770 /* Constants are converted into offsets from labels. */
12771 if (!MEM_P (op))
12772 return FALSE;
12774 ind = XEXP (op, 0);
12776 if (reload_completed
12777 && (GET_CODE (ind) == LABEL_REF
12778 || (GET_CODE (ind) == CONST
12779 && GET_CODE (XEXP (ind, 0)) == PLUS
12780 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12781 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12782 return TRUE;
12784 /* Match: (mem (reg)). */
12785 if (REG_P (ind))
12786 return arm_address_register_rtx_p (ind, 0);
12788 /* Allow post-increment with Neon registers. */
12789 if ((type != 1 && GET_CODE (ind) == POST_INC)
12790 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12791 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12793 /* Allow post-increment by register for VLDn */
12794 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12795 && GET_CODE (XEXP (ind, 1)) == PLUS
12796 && REG_P (XEXP (XEXP (ind, 1), 1)))
12797 return true;
12799 /* Match:
12800 (plus (reg)
12801 (const)). */
12802 if (type == 0
12803 && GET_CODE (ind) == PLUS
12804 && REG_P (XEXP (ind, 0))
12805 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12806 && CONST_INT_P (XEXP (ind, 1))
12807 && INTVAL (XEXP (ind, 1)) > -1024
12808 /* For quad modes, we restrict the constant offset to be slightly less
12809 than what the instruction format permits. We have no such constraint
12810 on double mode offsets. (This must match arm_legitimate_index_p.) */
12811 && (INTVAL (XEXP (ind, 1))
12812 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12813 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12814 return TRUE;
12816 return FALSE;
12819 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12820 type. */
12822 neon_struct_mem_operand (rtx op)
12824 rtx ind;
12826 /* Reject eliminable registers. */
12827 if (! (reload_in_progress || reload_completed)
12828 && ( reg_mentioned_p (frame_pointer_rtx, op)
12829 || reg_mentioned_p (arg_pointer_rtx, op)
12830 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12831 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12832 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12833 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12834 return FALSE;
12836 /* Constants are converted into offsets from labels. */
12837 if (!MEM_P (op))
12838 return FALSE;
12840 ind = XEXP (op, 0);
12842 if (reload_completed
12843 && (GET_CODE (ind) == LABEL_REF
12844 || (GET_CODE (ind) == CONST
12845 && GET_CODE (XEXP (ind, 0)) == PLUS
12846 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12847 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12848 return TRUE;
12850 /* Match: (mem (reg)). */
12851 if (REG_P (ind))
12852 return arm_address_register_rtx_p (ind, 0);
12854 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12855 if (GET_CODE (ind) == POST_INC
12856 || GET_CODE (ind) == PRE_DEC)
12857 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12859 return FALSE;
12862 /* Return true if X is a register that will be eliminated later on. */
12864 arm_eliminable_register (rtx x)
12866 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12867 || REGNO (x) == ARG_POINTER_REGNUM
12868 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12869 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12872 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12873 coprocessor registers. Otherwise return NO_REGS. */
12875 enum reg_class
12876 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12878 if (mode == HFmode)
12880 if (!TARGET_NEON_FP16)
12881 return GENERAL_REGS;
12882 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12883 return NO_REGS;
12884 return GENERAL_REGS;
12887 /* The neon move patterns handle all legitimate vector and struct
12888 addresses. */
12889 if (TARGET_NEON
12890 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12891 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12892 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12893 || VALID_NEON_STRUCT_MODE (mode)))
12894 return NO_REGS;
12896 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12897 return NO_REGS;
12899 return GENERAL_REGS;
12902 /* Values which must be returned in the most-significant end of the return
12903 register. */
12905 static bool
12906 arm_return_in_msb (const_tree valtype)
12908 return (TARGET_AAPCS_BASED
12909 && BYTES_BIG_ENDIAN
12910 && (AGGREGATE_TYPE_P (valtype)
12911 || TREE_CODE (valtype) == COMPLEX_TYPE
12912 || FIXED_POINT_TYPE_P (valtype)));
12915 /* Return TRUE if X references a SYMBOL_REF. */
12917 symbol_mentioned_p (rtx x)
12919 const char * fmt;
12920 int i;
12922 if (GET_CODE (x) == SYMBOL_REF)
12923 return 1;
12925 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12926 are constant offsets, not symbols. */
12927 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12928 return 0;
12930 fmt = GET_RTX_FORMAT (GET_CODE (x));
12932 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12934 if (fmt[i] == 'E')
12936 int j;
12938 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12939 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12940 return 1;
12942 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12943 return 1;
12946 return 0;
12949 /* Return TRUE if X references a LABEL_REF. */
12951 label_mentioned_p (rtx x)
12953 const char * fmt;
12954 int i;
12956 if (GET_CODE (x) == LABEL_REF)
12957 return 1;
12959 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12960 instruction, but they are constant offsets, not symbols. */
12961 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12962 return 0;
12964 fmt = GET_RTX_FORMAT (GET_CODE (x));
12965 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12967 if (fmt[i] == 'E')
12969 int j;
12971 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12972 if (label_mentioned_p (XVECEXP (x, i, j)))
12973 return 1;
12975 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12976 return 1;
12979 return 0;
12983 tls_mentioned_p (rtx x)
12985 switch (GET_CODE (x))
12987 case CONST:
12988 return tls_mentioned_p (XEXP (x, 0));
12990 case UNSPEC:
12991 if (XINT (x, 1) == UNSPEC_TLS)
12992 return 1;
12994 default:
12995 return 0;
12999 /* Must not copy any rtx that uses a pc-relative address. */
13001 static int
13002 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13004 if (GET_CODE (*x) == UNSPEC
13005 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13006 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13007 return 1;
13008 return 0;
13011 static bool
13012 arm_cannot_copy_insn_p (rtx insn)
13014 /* The tls call insn cannot be copied, as it is paired with a data
13015 word. */
13016 if (recog_memoized (insn) == CODE_FOR_tlscall)
13017 return true;
13019 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13022 enum rtx_code
13023 minmax_code (rtx x)
13025 enum rtx_code code = GET_CODE (x);
13027 switch (code)
13029 case SMAX:
13030 return GE;
13031 case SMIN:
13032 return LE;
13033 case UMIN:
13034 return LEU;
13035 case UMAX:
13036 return GEU;
13037 default:
13038 gcc_unreachable ();
13042 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13044 bool
13045 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13046 int *mask, bool *signed_sat)
13048 /* The high bound must be a power of two minus one. */
13049 int log = exact_log2 (INTVAL (hi_bound) + 1);
13050 if (log == -1)
13051 return false;
13053 /* The low bound is either zero (for usat) or one less than the
13054 negation of the high bound (for ssat). */
13055 if (INTVAL (lo_bound) == 0)
13057 if (mask)
13058 *mask = log;
13059 if (signed_sat)
13060 *signed_sat = false;
13062 return true;
13065 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13067 if (mask)
13068 *mask = log + 1;
13069 if (signed_sat)
13070 *signed_sat = true;
13072 return true;
13075 return false;
13078 /* Return 1 if memory locations are adjacent. */
13080 adjacent_mem_locations (rtx a, rtx b)
13082 /* We don't guarantee to preserve the order of these memory refs. */
13083 if (volatile_refs_p (a) || volatile_refs_p (b))
13084 return 0;
13086 if ((REG_P (XEXP (a, 0))
13087 || (GET_CODE (XEXP (a, 0)) == PLUS
13088 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13089 && (REG_P (XEXP (b, 0))
13090 || (GET_CODE (XEXP (b, 0)) == PLUS
13091 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13093 HOST_WIDE_INT val0 = 0, val1 = 0;
13094 rtx reg0, reg1;
13095 int val_diff;
13097 if (GET_CODE (XEXP (a, 0)) == PLUS)
13099 reg0 = XEXP (XEXP (a, 0), 0);
13100 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13102 else
13103 reg0 = XEXP (a, 0);
13105 if (GET_CODE (XEXP (b, 0)) == PLUS)
13107 reg1 = XEXP (XEXP (b, 0), 0);
13108 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13110 else
13111 reg1 = XEXP (b, 0);
13113 /* Don't accept any offset that will require multiple
13114 instructions to handle, since this would cause the
13115 arith_adjacentmem pattern to output an overlong sequence. */
13116 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13117 return 0;
13119 /* Don't allow an eliminable register: register elimination can make
13120 the offset too large. */
13121 if (arm_eliminable_register (reg0))
13122 return 0;
13124 val_diff = val1 - val0;
13126 if (arm_ld_sched)
13128 /* If the target has load delay slots, then there's no benefit
13129 to using an ldm instruction unless the offset is zero and
13130 we are optimizing for size. */
13131 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13132 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13133 && (val_diff == 4 || val_diff == -4));
13136 return ((REGNO (reg0) == REGNO (reg1))
13137 && (val_diff == 4 || val_diff == -4));
13140 return 0;
13143 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13144 for load operations, false for store operations. CONSECUTIVE is true
13145 if the register numbers in the operation must be consecutive in the register
13146 bank. RETURN_PC is true if value is to be loaded in PC.
13147 The pattern we are trying to match for load is:
13148 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13149 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13152 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13154 where
13155 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13156 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13157 3. If consecutive is TRUE, then for kth register being loaded,
13158 REGNO (R_dk) = REGNO (R_d0) + k.
13159 The pattern for store is similar. */
13160 bool
13161 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13162 bool consecutive, bool return_pc)
13164 HOST_WIDE_INT count = XVECLEN (op, 0);
13165 rtx reg, mem, addr;
13166 unsigned regno;
13167 unsigned first_regno;
13168 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13169 rtx elt;
13170 bool addr_reg_in_reglist = false;
13171 bool update = false;
13172 int reg_increment;
13173 int offset_adj;
13174 int regs_per_val;
13176 /* If not in SImode, then registers must be consecutive
13177 (e.g., VLDM instructions for DFmode). */
13178 gcc_assert ((mode == SImode) || consecutive);
13179 /* Setting return_pc for stores is illegal. */
13180 gcc_assert (!return_pc || load);
13182 /* Set up the increments and the regs per val based on the mode. */
13183 reg_increment = GET_MODE_SIZE (mode);
13184 regs_per_val = reg_increment / 4;
13185 offset_adj = return_pc ? 1 : 0;
13187 if (count <= 1
13188 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13189 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13190 return false;
13192 /* Check if this is a write-back. */
13193 elt = XVECEXP (op, 0, offset_adj);
13194 if (GET_CODE (SET_SRC (elt)) == PLUS)
13196 i++;
13197 base = 1;
13198 update = true;
13200 /* The offset adjustment must be the number of registers being
13201 popped times the size of a single register. */
13202 if (!REG_P (SET_DEST (elt))
13203 || !REG_P (XEXP (SET_SRC (elt), 0))
13204 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13205 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13206 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13207 ((count - 1 - offset_adj) * reg_increment))
13208 return false;
13211 i = i + offset_adj;
13212 base = base + offset_adj;
13213 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13214 success depends on the type: VLDM can do just one reg,
13215 LDM must do at least two. */
13216 if ((count <= i) && (mode == SImode))
13217 return false;
13219 elt = XVECEXP (op, 0, i - 1);
13220 if (GET_CODE (elt) != SET)
13221 return false;
13223 if (load)
13225 reg = SET_DEST (elt);
13226 mem = SET_SRC (elt);
13228 else
13230 reg = SET_SRC (elt);
13231 mem = SET_DEST (elt);
13234 if (!REG_P (reg) || !MEM_P (mem))
13235 return false;
13237 regno = REGNO (reg);
13238 first_regno = regno;
13239 addr = XEXP (mem, 0);
13240 if (GET_CODE (addr) == PLUS)
13242 if (!CONST_INT_P (XEXP (addr, 1)))
13243 return false;
13245 offset = INTVAL (XEXP (addr, 1));
13246 addr = XEXP (addr, 0);
13249 if (!REG_P (addr))
13250 return false;
13252 /* Don't allow SP to be loaded unless it is also the base register. It
13253 guarantees that SP is reset correctly when an LDM instruction
13254 is interrupted. Otherwise, we might end up with a corrupt stack. */
13255 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13256 return false;
13258 for (; i < count; i++)
13260 elt = XVECEXP (op, 0, i);
13261 if (GET_CODE (elt) != SET)
13262 return false;
13264 if (load)
13266 reg = SET_DEST (elt);
13267 mem = SET_SRC (elt);
13269 else
13271 reg = SET_SRC (elt);
13272 mem = SET_DEST (elt);
13275 if (!REG_P (reg)
13276 || GET_MODE (reg) != mode
13277 || REGNO (reg) <= regno
13278 || (consecutive
13279 && (REGNO (reg) !=
13280 (unsigned int) (first_regno + regs_per_val * (i - base))))
13281 /* Don't allow SP to be loaded unless it is also the base register. It
13282 guarantees that SP is reset correctly when an LDM instruction
13283 is interrupted. Otherwise, we might end up with a corrupt stack. */
13284 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13285 || !MEM_P (mem)
13286 || GET_MODE (mem) != mode
13287 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13288 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13289 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13290 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13291 offset + (i - base) * reg_increment))
13292 && (!REG_P (XEXP (mem, 0))
13293 || offset + (i - base) * reg_increment != 0)))
13294 return false;
13296 regno = REGNO (reg);
13297 if (regno == REGNO (addr))
13298 addr_reg_in_reglist = true;
13301 if (load)
13303 if (update && addr_reg_in_reglist)
13304 return false;
13306 /* For Thumb-1, address register is always modified - either by write-back
13307 or by explicit load. If the pattern does not describe an update,
13308 then the address register must be in the list of loaded registers. */
13309 if (TARGET_THUMB1)
13310 return update || addr_reg_in_reglist;
13313 return true;
13316 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13317 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13318 instruction. ADD_OFFSET is nonzero if the base address register needs
13319 to be modified with an add instruction before we can use it. */
13321 static bool
13322 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13323 int nops, HOST_WIDE_INT add_offset)
13325 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13326 if the offset isn't small enough. The reason 2 ldrs are faster
13327 is because these ARMs are able to do more than one cache access
13328 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13329 whilst the ARM8 has a double bandwidth cache. This means that
13330 these cores can do both an instruction fetch and a data fetch in
13331 a single cycle, so the trick of calculating the address into a
13332 scratch register (one of the result regs) and then doing a load
13333 multiple actually becomes slower (and no smaller in code size).
13334 That is the transformation
13336 ldr rd1, [rbase + offset]
13337 ldr rd2, [rbase + offset + 4]
13341 add rd1, rbase, offset
13342 ldmia rd1, {rd1, rd2}
13344 produces worse code -- '3 cycles + any stalls on rd2' instead of
13345 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13346 access per cycle, the first sequence could never complete in less
13347 than 6 cycles, whereas the ldm sequence would only take 5 and
13348 would make better use of sequential accesses if not hitting the
13349 cache.
13351 We cheat here and test 'arm_ld_sched' which we currently know to
13352 only be true for the ARM8, ARM9 and StrongARM. If this ever
13353 changes, then the test below needs to be reworked. */
13354 if (nops == 2 && arm_ld_sched && add_offset != 0)
13355 return false;
13357 /* XScale has load-store double instructions, but they have stricter
13358 alignment requirements than load-store multiple, so we cannot
13359 use them.
13361 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13362 the pipeline until completion.
13364 NREGS CYCLES
13370 An ldr instruction takes 1-3 cycles, but does not block the
13371 pipeline.
13373 NREGS CYCLES
13374 1 1-3
13375 2 2-6
13376 3 3-9
13377 4 4-12
13379 Best case ldr will always win. However, the more ldr instructions
13380 we issue, the less likely we are to be able to schedule them well.
13381 Using ldr instructions also increases code size.
13383 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13384 for counts of 3 or 4 regs. */
13385 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13386 return false;
13387 return true;
13390 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13391 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13392 an array ORDER which describes the sequence to use when accessing the
13393 offsets that produces an ascending order. In this sequence, each
13394 offset must be larger by exactly 4 than the previous one. ORDER[0]
13395 must have been filled in with the lowest offset by the caller.
13396 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13397 we use to verify that ORDER produces an ascending order of registers.
13398 Return true if it was possible to construct such an order, false if
13399 not. */
13401 static bool
13402 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13403 int *unsorted_regs)
13405 int i;
13406 for (i = 1; i < nops; i++)
13408 int j;
13410 order[i] = order[i - 1];
13411 for (j = 0; j < nops; j++)
13412 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13414 /* We must find exactly one offset that is higher than the
13415 previous one by 4. */
13416 if (order[i] != order[i - 1])
13417 return false;
13418 order[i] = j;
13420 if (order[i] == order[i - 1])
13421 return false;
13422 /* The register numbers must be ascending. */
13423 if (unsorted_regs != NULL
13424 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13425 return false;
13427 return true;
13430 /* Used to determine in a peephole whether a sequence of load
13431 instructions can be changed into a load-multiple instruction.
13432 NOPS is the number of separate load instructions we are examining. The
13433 first NOPS entries in OPERANDS are the destination registers, the
13434 next NOPS entries are memory operands. If this function is
13435 successful, *BASE is set to the common base register of the memory
13436 accesses; *LOAD_OFFSET is set to the first memory location's offset
13437 from that base register.
13438 REGS is an array filled in with the destination register numbers.
13439 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13440 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13441 the sequence of registers in REGS matches the loads from ascending memory
13442 locations, and the function verifies that the register numbers are
13443 themselves ascending. If CHECK_REGS is false, the register numbers
13444 are stored in the order they are found in the operands. */
13445 static int
13446 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13447 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13449 int unsorted_regs[MAX_LDM_STM_OPS];
13450 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13451 int order[MAX_LDM_STM_OPS];
13452 rtx base_reg_rtx = NULL;
13453 int base_reg = -1;
13454 int i, ldm_case;
13456 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13457 easily extended if required. */
13458 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13460 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13462 /* Loop over the operands and check that the memory references are
13463 suitable (i.e. immediate offsets from the same base register). At
13464 the same time, extract the target register, and the memory
13465 offsets. */
13466 for (i = 0; i < nops; i++)
13468 rtx reg;
13469 rtx offset;
13471 /* Convert a subreg of a mem into the mem itself. */
13472 if (GET_CODE (operands[nops + i]) == SUBREG)
13473 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13475 gcc_assert (MEM_P (operands[nops + i]));
13477 /* Don't reorder volatile memory references; it doesn't seem worth
13478 looking for the case where the order is ok anyway. */
13479 if (MEM_VOLATILE_P (operands[nops + i]))
13480 return 0;
13482 offset = const0_rtx;
13484 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13485 || (GET_CODE (reg) == SUBREG
13486 && REG_P (reg = SUBREG_REG (reg))))
13487 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13488 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13489 || (GET_CODE (reg) == SUBREG
13490 && REG_P (reg = SUBREG_REG (reg))))
13491 && (CONST_INT_P (offset
13492 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13494 if (i == 0)
13496 base_reg = REGNO (reg);
13497 base_reg_rtx = reg;
13498 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13499 return 0;
13501 else if (base_reg != (int) REGNO (reg))
13502 /* Not addressed from the same base register. */
13503 return 0;
13505 unsorted_regs[i] = (REG_P (operands[i])
13506 ? REGNO (operands[i])
13507 : REGNO (SUBREG_REG (operands[i])));
13509 /* If it isn't an integer register, or if it overwrites the
13510 base register but isn't the last insn in the list, then
13511 we can't do this. */
13512 if (unsorted_regs[i] < 0
13513 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13514 || unsorted_regs[i] > 14
13515 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13516 return 0;
13518 /* Don't allow SP to be loaded unless it is also the base
13519 register. It guarantees that SP is reset correctly when
13520 an LDM instruction is interrupted. Otherwise, we might
13521 end up with a corrupt stack. */
13522 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13523 return 0;
13525 unsorted_offsets[i] = INTVAL (offset);
13526 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13527 order[0] = i;
13529 else
13530 /* Not a suitable memory address. */
13531 return 0;
13534 /* All the useful information has now been extracted from the
13535 operands into unsorted_regs and unsorted_offsets; additionally,
13536 order[0] has been set to the lowest offset in the list. Sort
13537 the offsets into order, verifying that they are adjacent, and
13538 check that the register numbers are ascending. */
13539 if (!compute_offset_order (nops, unsorted_offsets, order,
13540 check_regs ? unsorted_regs : NULL))
13541 return 0;
13543 if (saved_order)
13544 memcpy (saved_order, order, sizeof order);
13546 if (base)
13548 *base = base_reg;
13550 for (i = 0; i < nops; i++)
13551 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13553 *load_offset = unsorted_offsets[order[0]];
13556 if (TARGET_THUMB1
13557 && !peep2_reg_dead_p (nops, base_reg_rtx))
13558 return 0;
13560 if (unsorted_offsets[order[0]] == 0)
13561 ldm_case = 1; /* ldmia */
13562 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13563 ldm_case = 2; /* ldmib */
13564 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13565 ldm_case = 3; /* ldmda */
13566 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13567 ldm_case = 4; /* ldmdb */
13568 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13569 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13570 ldm_case = 5;
13571 else
13572 return 0;
13574 if (!multiple_operation_profitable_p (false, nops,
13575 ldm_case == 5
13576 ? unsorted_offsets[order[0]] : 0))
13577 return 0;
13579 return ldm_case;
13582 /* Used to determine in a peephole whether a sequence of store instructions can
13583 be changed into a store-multiple instruction.
13584 NOPS is the number of separate store instructions we are examining.
13585 NOPS_TOTAL is the total number of instructions recognized by the peephole
13586 pattern.
13587 The first NOPS entries in OPERANDS are the source registers, the next
13588 NOPS entries are memory operands. If this function is successful, *BASE is
13589 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13590 to the first memory location's offset from that base register. REGS is an
13591 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13592 likewise filled with the corresponding rtx's.
13593 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13594 numbers to an ascending order of stores.
13595 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13596 from ascending memory locations, and the function verifies that the register
13597 numbers are themselves ascending. If CHECK_REGS is false, the register
13598 numbers are stored in the order they are found in the operands. */
13599 static int
13600 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13601 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13602 HOST_WIDE_INT *load_offset, bool check_regs)
13604 int unsorted_regs[MAX_LDM_STM_OPS];
13605 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13606 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13607 int order[MAX_LDM_STM_OPS];
13608 int base_reg = -1;
13609 rtx base_reg_rtx = NULL;
13610 int i, stm_case;
13612 /* Write back of base register is currently only supported for Thumb 1. */
13613 int base_writeback = TARGET_THUMB1;
13615 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13616 easily extended if required. */
13617 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13619 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13621 /* Loop over the operands and check that the memory references are
13622 suitable (i.e. immediate offsets from the same base register). At
13623 the same time, extract the target register, and the memory
13624 offsets. */
13625 for (i = 0; i < nops; i++)
13627 rtx reg;
13628 rtx offset;
13630 /* Convert a subreg of a mem into the mem itself. */
13631 if (GET_CODE (operands[nops + i]) == SUBREG)
13632 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13634 gcc_assert (MEM_P (operands[nops + i]));
13636 /* Don't reorder volatile memory references; it doesn't seem worth
13637 looking for the case where the order is ok anyway. */
13638 if (MEM_VOLATILE_P (operands[nops + i]))
13639 return 0;
13641 offset = const0_rtx;
13643 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13644 || (GET_CODE (reg) == SUBREG
13645 && REG_P (reg = SUBREG_REG (reg))))
13646 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13647 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13648 || (GET_CODE (reg) == SUBREG
13649 && REG_P (reg = SUBREG_REG (reg))))
13650 && (CONST_INT_P (offset
13651 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13653 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13654 ? operands[i] : SUBREG_REG (operands[i]));
13655 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13657 if (i == 0)
13659 base_reg = REGNO (reg);
13660 base_reg_rtx = reg;
13661 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13662 return 0;
13664 else if (base_reg != (int) REGNO (reg))
13665 /* Not addressed from the same base register. */
13666 return 0;
13668 /* If it isn't an integer register, then we can't do this. */
13669 if (unsorted_regs[i] < 0
13670 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13671 /* The effects are unpredictable if the base register is
13672 both updated and stored. */
13673 || (base_writeback && unsorted_regs[i] == base_reg)
13674 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13675 || unsorted_regs[i] > 14)
13676 return 0;
13678 unsorted_offsets[i] = INTVAL (offset);
13679 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13680 order[0] = i;
13682 else
13683 /* Not a suitable memory address. */
13684 return 0;
13687 /* All the useful information has now been extracted from the
13688 operands into unsorted_regs and unsorted_offsets; additionally,
13689 order[0] has been set to the lowest offset in the list. Sort
13690 the offsets into order, verifying that they are adjacent, and
13691 check that the register numbers are ascending. */
13692 if (!compute_offset_order (nops, unsorted_offsets, order,
13693 check_regs ? unsorted_regs : NULL))
13694 return 0;
13696 if (saved_order)
13697 memcpy (saved_order, order, sizeof order);
13699 if (base)
13701 *base = base_reg;
13703 for (i = 0; i < nops; i++)
13705 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13706 if (reg_rtxs)
13707 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13710 *load_offset = unsorted_offsets[order[0]];
13713 if (TARGET_THUMB1
13714 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13715 return 0;
13717 if (unsorted_offsets[order[0]] == 0)
13718 stm_case = 1; /* stmia */
13719 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13720 stm_case = 2; /* stmib */
13721 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13722 stm_case = 3; /* stmda */
13723 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13724 stm_case = 4; /* stmdb */
13725 else
13726 return 0;
13728 if (!multiple_operation_profitable_p (false, nops, 0))
13729 return 0;
13731 return stm_case;
13734 /* Routines for use in generating RTL. */
13736 /* Generate a load-multiple instruction. COUNT is the number of loads in
13737 the instruction; REGS and MEMS are arrays containing the operands.
13738 BASEREG is the base register to be used in addressing the memory operands.
13739 WBACK_OFFSET is nonzero if the instruction should update the base
13740 register. */
13742 static rtx
13743 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13744 HOST_WIDE_INT wback_offset)
13746 int i = 0, j;
13747 rtx result;
13749 if (!multiple_operation_profitable_p (false, count, 0))
13751 rtx seq;
13753 start_sequence ();
13755 for (i = 0; i < count; i++)
13756 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13758 if (wback_offset != 0)
13759 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13761 seq = get_insns ();
13762 end_sequence ();
13764 return seq;
13767 result = gen_rtx_PARALLEL (VOIDmode,
13768 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13769 if (wback_offset != 0)
13771 XVECEXP (result, 0, 0)
13772 = gen_rtx_SET (VOIDmode, basereg,
13773 plus_constant (Pmode, basereg, wback_offset));
13774 i = 1;
13775 count++;
13778 for (j = 0; i < count; i++, j++)
13779 XVECEXP (result, 0, i)
13780 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13782 return result;
13785 /* Generate a store-multiple instruction. COUNT is the number of stores in
13786 the instruction; REGS and MEMS are arrays containing the operands.
13787 BASEREG is the base register to be used in addressing the memory operands.
13788 WBACK_OFFSET is nonzero if the instruction should update the base
13789 register. */
13791 static rtx
13792 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13793 HOST_WIDE_INT wback_offset)
13795 int i = 0, j;
13796 rtx result;
13798 if (GET_CODE (basereg) == PLUS)
13799 basereg = XEXP (basereg, 0);
13801 if (!multiple_operation_profitable_p (false, count, 0))
13803 rtx seq;
13805 start_sequence ();
13807 for (i = 0; i < count; i++)
13808 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13810 if (wback_offset != 0)
13811 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13813 seq = get_insns ();
13814 end_sequence ();
13816 return seq;
13819 result = gen_rtx_PARALLEL (VOIDmode,
13820 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13821 if (wback_offset != 0)
13823 XVECEXP (result, 0, 0)
13824 = gen_rtx_SET (VOIDmode, basereg,
13825 plus_constant (Pmode, basereg, wback_offset));
13826 i = 1;
13827 count++;
13830 for (j = 0; i < count; i++, j++)
13831 XVECEXP (result, 0, i)
13832 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13834 return result;
13837 /* Generate either a load-multiple or a store-multiple instruction. This
13838 function can be used in situations where we can start with a single MEM
13839 rtx and adjust its address upwards.
13840 COUNT is the number of operations in the instruction, not counting a
13841 possible update of the base register. REGS is an array containing the
13842 register operands.
13843 BASEREG is the base register to be used in addressing the memory operands,
13844 which are constructed from BASEMEM.
13845 WRITE_BACK specifies whether the generated instruction should include an
13846 update of the base register.
13847 OFFSETP is used to pass an offset to and from this function; this offset
13848 is not used when constructing the address (instead BASEMEM should have an
13849 appropriate offset in its address), it is used only for setting
13850 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13852 static rtx
13853 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13854 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13856 rtx mems[MAX_LDM_STM_OPS];
13857 HOST_WIDE_INT offset = *offsetp;
13858 int i;
13860 gcc_assert (count <= MAX_LDM_STM_OPS);
13862 if (GET_CODE (basereg) == PLUS)
13863 basereg = XEXP (basereg, 0);
13865 for (i = 0; i < count; i++)
13867 rtx addr = plus_constant (Pmode, basereg, i * 4);
13868 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13869 offset += 4;
13872 if (write_back)
13873 *offsetp = offset;
13875 if (is_load)
13876 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13877 write_back ? 4 * count : 0);
13878 else
13879 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13880 write_back ? 4 * count : 0);
13884 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13885 rtx basemem, HOST_WIDE_INT *offsetp)
13887 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13888 offsetp);
13892 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13893 rtx basemem, HOST_WIDE_INT *offsetp)
13895 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13896 offsetp);
13899 /* Called from a peephole2 expander to turn a sequence of loads into an
13900 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13901 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13902 is true if we can reorder the registers because they are used commutatively
13903 subsequently.
13904 Returns true iff we could generate a new instruction. */
13906 bool
13907 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13909 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13910 rtx mems[MAX_LDM_STM_OPS];
13911 int i, j, base_reg;
13912 rtx base_reg_rtx;
13913 HOST_WIDE_INT offset;
13914 int write_back = FALSE;
13915 int ldm_case;
13916 rtx addr;
13918 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13919 &base_reg, &offset, !sort_regs);
13921 if (ldm_case == 0)
13922 return false;
13924 if (sort_regs)
13925 for (i = 0; i < nops - 1; i++)
13926 for (j = i + 1; j < nops; j++)
13927 if (regs[i] > regs[j])
13929 int t = regs[i];
13930 regs[i] = regs[j];
13931 regs[j] = t;
13933 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13935 if (TARGET_THUMB1)
13937 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13938 gcc_assert (ldm_case == 1 || ldm_case == 5);
13939 write_back = TRUE;
13942 if (ldm_case == 5)
13944 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13945 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13946 offset = 0;
13947 if (!TARGET_THUMB1)
13949 base_reg = regs[0];
13950 base_reg_rtx = newbase;
13954 for (i = 0; i < nops; i++)
13956 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13957 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13958 SImode, addr, 0);
13960 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13961 write_back ? offset + i * 4 : 0));
13962 return true;
13965 /* Called from a peephole2 expander to turn a sequence of stores into an
13966 STM instruction. OPERANDS are the operands found by the peephole matcher;
13967 NOPS indicates how many separate stores we are trying to combine.
13968 Returns true iff we could generate a new instruction. */
13970 bool
13971 gen_stm_seq (rtx *operands, int nops)
13973 int i;
13974 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13975 rtx mems[MAX_LDM_STM_OPS];
13976 int base_reg;
13977 rtx base_reg_rtx;
13978 HOST_WIDE_INT offset;
13979 int write_back = FALSE;
13980 int stm_case;
13981 rtx addr;
13982 bool base_reg_dies;
13984 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13985 mem_order, &base_reg, &offset, true);
13987 if (stm_case == 0)
13988 return false;
13990 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13992 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13993 if (TARGET_THUMB1)
13995 gcc_assert (base_reg_dies);
13996 write_back = TRUE;
13999 if (stm_case == 5)
14001 gcc_assert (base_reg_dies);
14002 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14003 offset = 0;
14006 addr = plus_constant (Pmode, base_reg_rtx, offset);
14008 for (i = 0; i < nops; i++)
14010 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14011 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14012 SImode, addr, 0);
14014 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14015 write_back ? offset + i * 4 : 0));
14016 return true;
14019 /* Called from a peephole2 expander to turn a sequence of stores that are
14020 preceded by constant loads into an STM instruction. OPERANDS are the
14021 operands found by the peephole matcher; NOPS indicates how many
14022 separate stores we are trying to combine; there are 2 * NOPS
14023 instructions in the peephole.
14024 Returns true iff we could generate a new instruction. */
14026 bool
14027 gen_const_stm_seq (rtx *operands, int nops)
14029 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14030 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14031 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14032 rtx mems[MAX_LDM_STM_OPS];
14033 int base_reg;
14034 rtx base_reg_rtx;
14035 HOST_WIDE_INT offset;
14036 int write_back = FALSE;
14037 int stm_case;
14038 rtx addr;
14039 bool base_reg_dies;
14040 int i, j;
14041 HARD_REG_SET allocated;
14043 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14044 mem_order, &base_reg, &offset, false);
14046 if (stm_case == 0)
14047 return false;
14049 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14051 /* If the same register is used more than once, try to find a free
14052 register. */
14053 CLEAR_HARD_REG_SET (allocated);
14054 for (i = 0; i < nops; i++)
14056 for (j = i + 1; j < nops; j++)
14057 if (regs[i] == regs[j])
14059 rtx t = peep2_find_free_register (0, nops * 2,
14060 TARGET_THUMB1 ? "l" : "r",
14061 SImode, &allocated);
14062 if (t == NULL_RTX)
14063 return false;
14064 reg_rtxs[i] = t;
14065 regs[i] = REGNO (t);
14069 /* Compute an ordering that maps the register numbers to an ascending
14070 sequence. */
14071 reg_order[0] = 0;
14072 for (i = 0; i < nops; i++)
14073 if (regs[i] < regs[reg_order[0]])
14074 reg_order[0] = i;
14076 for (i = 1; i < nops; i++)
14078 int this_order = reg_order[i - 1];
14079 for (j = 0; j < nops; j++)
14080 if (regs[j] > regs[reg_order[i - 1]]
14081 && (this_order == reg_order[i - 1]
14082 || regs[j] < regs[this_order]))
14083 this_order = j;
14084 reg_order[i] = this_order;
14087 /* Ensure that registers that must be live after the instruction end
14088 up with the correct value. */
14089 for (i = 0; i < nops; i++)
14091 int this_order = reg_order[i];
14092 if ((this_order != mem_order[i]
14093 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14094 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14095 return false;
14098 /* Load the constants. */
14099 for (i = 0; i < nops; i++)
14101 rtx op = operands[2 * nops + mem_order[i]];
14102 sorted_regs[i] = regs[reg_order[i]];
14103 emit_move_insn (reg_rtxs[reg_order[i]], op);
14106 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14108 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14109 if (TARGET_THUMB1)
14111 gcc_assert (base_reg_dies);
14112 write_back = TRUE;
14115 if (stm_case == 5)
14117 gcc_assert (base_reg_dies);
14118 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14119 offset = 0;
14122 addr = plus_constant (Pmode, base_reg_rtx, offset);
14124 for (i = 0; i < nops; i++)
14126 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14127 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14128 SImode, addr, 0);
14130 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14131 write_back ? offset + i * 4 : 0));
14132 return true;
14135 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14136 unaligned copies on processors which support unaligned semantics for those
14137 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14138 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14139 An interleave factor of 1 (the minimum) will perform no interleaving.
14140 Load/store multiple are used for aligned addresses where possible. */
14142 static void
14143 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14144 HOST_WIDE_INT length,
14145 unsigned int interleave_factor)
14147 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14148 int *regnos = XALLOCAVEC (int, interleave_factor);
14149 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14150 HOST_WIDE_INT i, j;
14151 HOST_WIDE_INT remaining = length, words;
14152 rtx halfword_tmp = NULL, byte_tmp = NULL;
14153 rtx dst, src;
14154 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14155 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14156 HOST_WIDE_INT srcoffset, dstoffset;
14157 HOST_WIDE_INT src_autoinc, dst_autoinc;
14158 rtx mem, addr;
14160 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14162 /* Use hard registers if we have aligned source or destination so we can use
14163 load/store multiple with contiguous registers. */
14164 if (dst_aligned || src_aligned)
14165 for (i = 0; i < interleave_factor; i++)
14166 regs[i] = gen_rtx_REG (SImode, i);
14167 else
14168 for (i = 0; i < interleave_factor; i++)
14169 regs[i] = gen_reg_rtx (SImode);
14171 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14172 src = copy_addr_to_reg (XEXP (srcbase, 0));
14174 srcoffset = dstoffset = 0;
14176 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14177 For copying the last bytes we want to subtract this offset again. */
14178 src_autoinc = dst_autoinc = 0;
14180 for (i = 0; i < interleave_factor; i++)
14181 regnos[i] = i;
14183 /* Copy BLOCK_SIZE_BYTES chunks. */
14185 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14187 /* Load words. */
14188 if (src_aligned && interleave_factor > 1)
14190 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14191 TRUE, srcbase, &srcoffset));
14192 src_autoinc += UNITS_PER_WORD * interleave_factor;
14194 else
14196 for (j = 0; j < interleave_factor; j++)
14198 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14199 - src_autoinc));
14200 mem = adjust_automodify_address (srcbase, SImode, addr,
14201 srcoffset + j * UNITS_PER_WORD);
14202 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14204 srcoffset += block_size_bytes;
14207 /* Store words. */
14208 if (dst_aligned && interleave_factor > 1)
14210 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14211 TRUE, dstbase, &dstoffset));
14212 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14214 else
14216 for (j = 0; j < interleave_factor; j++)
14218 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14219 - dst_autoinc));
14220 mem = adjust_automodify_address (dstbase, SImode, addr,
14221 dstoffset + j * UNITS_PER_WORD);
14222 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14224 dstoffset += block_size_bytes;
14227 remaining -= block_size_bytes;
14230 /* Copy any whole words left (note these aren't interleaved with any
14231 subsequent halfword/byte load/stores in the interests of simplicity). */
14233 words = remaining / UNITS_PER_WORD;
14235 gcc_assert (words < interleave_factor);
14237 if (src_aligned && words > 1)
14239 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14240 &srcoffset));
14241 src_autoinc += UNITS_PER_WORD * words;
14243 else
14245 for (j = 0; j < words; j++)
14247 addr = plus_constant (Pmode, src,
14248 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14249 mem = adjust_automodify_address (srcbase, SImode, addr,
14250 srcoffset + j * UNITS_PER_WORD);
14251 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14253 srcoffset += words * UNITS_PER_WORD;
14256 if (dst_aligned && words > 1)
14258 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14259 &dstoffset));
14260 dst_autoinc += words * UNITS_PER_WORD;
14262 else
14264 for (j = 0; j < words; j++)
14266 addr = plus_constant (Pmode, dst,
14267 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14268 mem = adjust_automodify_address (dstbase, SImode, addr,
14269 dstoffset + j * UNITS_PER_WORD);
14270 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14272 dstoffset += words * UNITS_PER_WORD;
14275 remaining -= words * UNITS_PER_WORD;
14277 gcc_assert (remaining < 4);
14279 /* Copy a halfword if necessary. */
14281 if (remaining >= 2)
14283 halfword_tmp = gen_reg_rtx (SImode);
14285 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14286 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14287 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14289 /* Either write out immediately, or delay until we've loaded the last
14290 byte, depending on interleave factor. */
14291 if (interleave_factor == 1)
14293 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14294 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14295 emit_insn (gen_unaligned_storehi (mem,
14296 gen_lowpart (HImode, halfword_tmp)));
14297 halfword_tmp = NULL;
14298 dstoffset += 2;
14301 remaining -= 2;
14302 srcoffset += 2;
14305 gcc_assert (remaining < 2);
14307 /* Copy last byte. */
14309 if ((remaining & 1) != 0)
14311 byte_tmp = gen_reg_rtx (SImode);
14313 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14314 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14315 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14317 if (interleave_factor == 1)
14319 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14320 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14321 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14322 byte_tmp = NULL;
14323 dstoffset++;
14326 remaining--;
14327 srcoffset++;
14330 /* Store last halfword if we haven't done so already. */
14332 if (halfword_tmp)
14334 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14335 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14336 emit_insn (gen_unaligned_storehi (mem,
14337 gen_lowpart (HImode, halfword_tmp)));
14338 dstoffset += 2;
14341 /* Likewise for last byte. */
14343 if (byte_tmp)
14345 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14346 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14347 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14348 dstoffset++;
14351 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14354 /* From mips_adjust_block_mem:
14356 Helper function for doing a loop-based block operation on memory
14357 reference MEM. Each iteration of the loop will operate on LENGTH
14358 bytes of MEM.
14360 Create a new base register for use within the loop and point it to
14361 the start of MEM. Create a new memory reference that uses this
14362 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14364 static void
14365 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14366 rtx *loop_mem)
14368 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14370 /* Although the new mem does not refer to a known location,
14371 it does keep up to LENGTH bytes of alignment. */
14372 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14373 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14376 /* From mips_block_move_loop:
14378 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14379 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14380 the memory regions do not overlap. */
14382 static void
14383 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14384 unsigned int interleave_factor,
14385 HOST_WIDE_INT bytes_per_iter)
14387 rtx label, src_reg, dest_reg, final_src, test;
14388 HOST_WIDE_INT leftover;
14390 leftover = length % bytes_per_iter;
14391 length -= leftover;
14393 /* Create registers and memory references for use within the loop. */
14394 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14395 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14397 /* Calculate the value that SRC_REG should have after the last iteration of
14398 the loop. */
14399 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14400 0, 0, OPTAB_WIDEN);
14402 /* Emit the start of the loop. */
14403 label = gen_label_rtx ();
14404 emit_label (label);
14406 /* Emit the loop body. */
14407 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14408 interleave_factor);
14410 /* Move on to the next block. */
14411 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14412 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14414 /* Emit the loop condition. */
14415 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14416 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14418 /* Mop up any left-over bytes. */
14419 if (leftover)
14420 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14423 /* Emit a block move when either the source or destination is unaligned (not
14424 aligned to a four-byte boundary). This may need further tuning depending on
14425 core type, optimize_size setting, etc. */
14427 static int
14428 arm_movmemqi_unaligned (rtx *operands)
14430 HOST_WIDE_INT length = INTVAL (operands[2]);
14432 if (optimize_size)
14434 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14435 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14436 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14437 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14438 or dst_aligned though: allow more interleaving in those cases since the
14439 resulting code can be smaller. */
14440 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14441 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14443 if (length > 12)
14444 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14445 interleave_factor, bytes_per_iter);
14446 else
14447 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14448 interleave_factor);
14450 else
14452 /* Note that the loop created by arm_block_move_unaligned_loop may be
14453 subject to loop unrolling, which makes tuning this condition a little
14454 redundant. */
14455 if (length > 32)
14456 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14457 else
14458 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14461 return 1;
14465 arm_gen_movmemqi (rtx *operands)
14467 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14468 HOST_WIDE_INT srcoffset, dstoffset;
14469 int i;
14470 rtx src, dst, srcbase, dstbase;
14471 rtx part_bytes_reg = NULL;
14472 rtx mem;
14474 if (!CONST_INT_P (operands[2])
14475 || !CONST_INT_P (operands[3])
14476 || INTVAL (operands[2]) > 64)
14477 return 0;
14479 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14480 return arm_movmemqi_unaligned (operands);
14482 if (INTVAL (operands[3]) & 3)
14483 return 0;
14485 dstbase = operands[0];
14486 srcbase = operands[1];
14488 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14489 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14491 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14492 out_words_to_go = INTVAL (operands[2]) / 4;
14493 last_bytes = INTVAL (operands[2]) & 3;
14494 dstoffset = srcoffset = 0;
14496 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14497 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14499 for (i = 0; in_words_to_go >= 2; i+=4)
14501 if (in_words_to_go > 4)
14502 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14503 TRUE, srcbase, &srcoffset));
14504 else
14505 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14506 src, FALSE, srcbase,
14507 &srcoffset));
14509 if (out_words_to_go)
14511 if (out_words_to_go > 4)
14512 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14513 TRUE, dstbase, &dstoffset));
14514 else if (out_words_to_go != 1)
14515 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14516 out_words_to_go, dst,
14517 (last_bytes == 0
14518 ? FALSE : TRUE),
14519 dstbase, &dstoffset));
14520 else
14522 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14523 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14524 if (last_bytes != 0)
14526 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14527 dstoffset += 4;
14532 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14533 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14536 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14537 if (out_words_to_go)
14539 rtx sreg;
14541 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14542 sreg = copy_to_reg (mem);
14544 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14545 emit_move_insn (mem, sreg);
14546 in_words_to_go--;
14548 gcc_assert (!in_words_to_go); /* Sanity check */
14551 if (in_words_to_go)
14553 gcc_assert (in_words_to_go > 0);
14555 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14556 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14559 gcc_assert (!last_bytes || part_bytes_reg);
14561 if (BYTES_BIG_ENDIAN && last_bytes)
14563 rtx tmp = gen_reg_rtx (SImode);
14565 /* The bytes we want are in the top end of the word. */
14566 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14567 GEN_INT (8 * (4 - last_bytes))));
14568 part_bytes_reg = tmp;
14570 while (last_bytes)
14572 mem = adjust_automodify_address (dstbase, QImode,
14573 plus_constant (Pmode, dst,
14574 last_bytes - 1),
14575 dstoffset + last_bytes - 1);
14576 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14578 if (--last_bytes)
14580 tmp = gen_reg_rtx (SImode);
14581 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14582 part_bytes_reg = tmp;
14587 else
14589 if (last_bytes > 1)
14591 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14592 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14593 last_bytes -= 2;
14594 if (last_bytes)
14596 rtx tmp = gen_reg_rtx (SImode);
14597 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14598 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14599 part_bytes_reg = tmp;
14600 dstoffset += 2;
14604 if (last_bytes)
14606 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14607 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14611 return 1;
14614 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14615 by mode size. */
14616 inline static rtx
14617 next_consecutive_mem (rtx mem)
14619 enum machine_mode mode = GET_MODE (mem);
14620 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14621 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14623 return adjust_automodify_address (mem, mode, addr, offset);
14626 /* Copy using LDRD/STRD instructions whenever possible.
14627 Returns true upon success. */
14628 bool
14629 gen_movmem_ldrd_strd (rtx *operands)
14631 unsigned HOST_WIDE_INT len;
14632 HOST_WIDE_INT align;
14633 rtx src, dst, base;
14634 rtx reg0;
14635 bool src_aligned, dst_aligned;
14636 bool src_volatile, dst_volatile;
14638 gcc_assert (CONST_INT_P (operands[2]));
14639 gcc_assert (CONST_INT_P (operands[3]));
14641 len = UINTVAL (operands[2]);
14642 if (len > 64)
14643 return false;
14645 /* Maximum alignment we can assume for both src and dst buffers. */
14646 align = INTVAL (operands[3]);
14648 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14649 return false;
14651 /* Place src and dst addresses in registers
14652 and update the corresponding mem rtx. */
14653 dst = operands[0];
14654 dst_volatile = MEM_VOLATILE_P (dst);
14655 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14656 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14657 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14659 src = operands[1];
14660 src_volatile = MEM_VOLATILE_P (src);
14661 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14662 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14663 src = adjust_automodify_address (src, VOIDmode, base, 0);
14665 if (!unaligned_access && !(src_aligned && dst_aligned))
14666 return false;
14668 if (src_volatile || dst_volatile)
14669 return false;
14671 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14672 if (!(dst_aligned || src_aligned))
14673 return arm_gen_movmemqi (operands);
14675 src = adjust_address (src, DImode, 0);
14676 dst = adjust_address (dst, DImode, 0);
14677 while (len >= 8)
14679 len -= 8;
14680 reg0 = gen_reg_rtx (DImode);
14681 if (src_aligned)
14682 emit_move_insn (reg0, src);
14683 else
14684 emit_insn (gen_unaligned_loaddi (reg0, src));
14686 if (dst_aligned)
14687 emit_move_insn (dst, reg0);
14688 else
14689 emit_insn (gen_unaligned_storedi (dst, reg0));
14691 src = next_consecutive_mem (src);
14692 dst = next_consecutive_mem (dst);
14695 gcc_assert (len < 8);
14696 if (len >= 4)
14698 /* More than a word but less than a double-word to copy. Copy a word. */
14699 reg0 = gen_reg_rtx (SImode);
14700 src = adjust_address (src, SImode, 0);
14701 dst = adjust_address (dst, SImode, 0);
14702 if (src_aligned)
14703 emit_move_insn (reg0, src);
14704 else
14705 emit_insn (gen_unaligned_loadsi (reg0, src));
14707 if (dst_aligned)
14708 emit_move_insn (dst, reg0);
14709 else
14710 emit_insn (gen_unaligned_storesi (dst, reg0));
14712 src = next_consecutive_mem (src);
14713 dst = next_consecutive_mem (dst);
14714 len -= 4;
14717 if (len == 0)
14718 return true;
14720 /* Copy the remaining bytes. */
14721 if (len >= 2)
14723 dst = adjust_address (dst, HImode, 0);
14724 src = adjust_address (src, HImode, 0);
14725 reg0 = gen_reg_rtx (SImode);
14726 if (src_aligned)
14727 emit_insn (gen_zero_extendhisi2 (reg0, src));
14728 else
14729 emit_insn (gen_unaligned_loadhiu (reg0, src));
14731 if (dst_aligned)
14732 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14733 else
14734 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14736 src = next_consecutive_mem (src);
14737 dst = next_consecutive_mem (dst);
14738 if (len == 2)
14739 return true;
14742 dst = adjust_address (dst, QImode, 0);
14743 src = adjust_address (src, QImode, 0);
14744 reg0 = gen_reg_rtx (QImode);
14745 emit_move_insn (reg0, src);
14746 emit_move_insn (dst, reg0);
14747 return true;
14750 /* Select a dominance comparison mode if possible for a test of the general
14751 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14752 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14753 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14754 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14755 In all cases OP will be either EQ or NE, but we don't need to know which
14756 here. If we are unable to support a dominance comparison we return
14757 CC mode. This will then fail to match for the RTL expressions that
14758 generate this call. */
14759 enum machine_mode
14760 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14762 enum rtx_code cond1, cond2;
14763 int swapped = 0;
14765 /* Currently we will probably get the wrong result if the individual
14766 comparisons are not simple. This also ensures that it is safe to
14767 reverse a comparison if necessary. */
14768 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14769 != CCmode)
14770 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14771 != CCmode))
14772 return CCmode;
14774 /* The if_then_else variant of this tests the second condition if the
14775 first passes, but is true if the first fails. Reverse the first
14776 condition to get a true "inclusive-or" expression. */
14777 if (cond_or == DOM_CC_NX_OR_Y)
14778 cond1 = reverse_condition (cond1);
14780 /* If the comparisons are not equal, and one doesn't dominate the other,
14781 then we can't do this. */
14782 if (cond1 != cond2
14783 && !comparison_dominates_p (cond1, cond2)
14784 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14785 return CCmode;
14787 if (swapped)
14789 enum rtx_code temp = cond1;
14790 cond1 = cond2;
14791 cond2 = temp;
14794 switch (cond1)
14796 case EQ:
14797 if (cond_or == DOM_CC_X_AND_Y)
14798 return CC_DEQmode;
14800 switch (cond2)
14802 case EQ: return CC_DEQmode;
14803 case LE: return CC_DLEmode;
14804 case LEU: return CC_DLEUmode;
14805 case GE: return CC_DGEmode;
14806 case GEU: return CC_DGEUmode;
14807 default: gcc_unreachable ();
14810 case LT:
14811 if (cond_or == DOM_CC_X_AND_Y)
14812 return CC_DLTmode;
14814 switch (cond2)
14816 case LT:
14817 return CC_DLTmode;
14818 case LE:
14819 return CC_DLEmode;
14820 case NE:
14821 return CC_DNEmode;
14822 default:
14823 gcc_unreachable ();
14826 case GT:
14827 if (cond_or == DOM_CC_X_AND_Y)
14828 return CC_DGTmode;
14830 switch (cond2)
14832 case GT:
14833 return CC_DGTmode;
14834 case GE:
14835 return CC_DGEmode;
14836 case NE:
14837 return CC_DNEmode;
14838 default:
14839 gcc_unreachable ();
14842 case LTU:
14843 if (cond_or == DOM_CC_X_AND_Y)
14844 return CC_DLTUmode;
14846 switch (cond2)
14848 case LTU:
14849 return CC_DLTUmode;
14850 case LEU:
14851 return CC_DLEUmode;
14852 case NE:
14853 return CC_DNEmode;
14854 default:
14855 gcc_unreachable ();
14858 case GTU:
14859 if (cond_or == DOM_CC_X_AND_Y)
14860 return CC_DGTUmode;
14862 switch (cond2)
14864 case GTU:
14865 return CC_DGTUmode;
14866 case GEU:
14867 return CC_DGEUmode;
14868 case NE:
14869 return CC_DNEmode;
14870 default:
14871 gcc_unreachable ();
14874 /* The remaining cases only occur when both comparisons are the
14875 same. */
14876 case NE:
14877 gcc_assert (cond1 == cond2);
14878 return CC_DNEmode;
14880 case LE:
14881 gcc_assert (cond1 == cond2);
14882 return CC_DLEmode;
14884 case GE:
14885 gcc_assert (cond1 == cond2);
14886 return CC_DGEmode;
14888 case LEU:
14889 gcc_assert (cond1 == cond2);
14890 return CC_DLEUmode;
14892 case GEU:
14893 gcc_assert (cond1 == cond2);
14894 return CC_DGEUmode;
14896 default:
14897 gcc_unreachable ();
14901 enum machine_mode
14902 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14904 /* All floating point compares return CCFP if it is an equality
14905 comparison, and CCFPE otherwise. */
14906 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14908 switch (op)
14910 case EQ:
14911 case NE:
14912 case UNORDERED:
14913 case ORDERED:
14914 case UNLT:
14915 case UNLE:
14916 case UNGT:
14917 case UNGE:
14918 case UNEQ:
14919 case LTGT:
14920 return CCFPmode;
14922 case LT:
14923 case LE:
14924 case GT:
14925 case GE:
14926 return CCFPEmode;
14928 default:
14929 gcc_unreachable ();
14933 /* A compare with a shifted operand. Because of canonicalization, the
14934 comparison will have to be swapped when we emit the assembler. */
14935 if (GET_MODE (y) == SImode
14936 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14937 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14938 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14939 || GET_CODE (x) == ROTATERT))
14940 return CC_SWPmode;
14942 /* This operation is performed swapped, but since we only rely on the Z
14943 flag we don't need an additional mode. */
14944 if (GET_MODE (y) == SImode
14945 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14946 && GET_CODE (x) == NEG
14947 && (op == EQ || op == NE))
14948 return CC_Zmode;
14950 /* This is a special case that is used by combine to allow a
14951 comparison of a shifted byte load to be split into a zero-extend
14952 followed by a comparison of the shifted integer (only valid for
14953 equalities and unsigned inequalities). */
14954 if (GET_MODE (x) == SImode
14955 && GET_CODE (x) == ASHIFT
14956 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14957 && GET_CODE (XEXP (x, 0)) == SUBREG
14958 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14959 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14960 && (op == EQ || op == NE
14961 || op == GEU || op == GTU || op == LTU || op == LEU)
14962 && CONST_INT_P (y))
14963 return CC_Zmode;
14965 /* A construct for a conditional compare, if the false arm contains
14966 0, then both conditions must be true, otherwise either condition
14967 must be true. Not all conditions are possible, so CCmode is
14968 returned if it can't be done. */
14969 if (GET_CODE (x) == IF_THEN_ELSE
14970 && (XEXP (x, 2) == const0_rtx
14971 || XEXP (x, 2) == const1_rtx)
14972 && COMPARISON_P (XEXP (x, 0))
14973 && COMPARISON_P (XEXP (x, 1)))
14974 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14975 INTVAL (XEXP (x, 2)));
14977 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14978 if (GET_CODE (x) == AND
14979 && (op == EQ || op == NE)
14980 && COMPARISON_P (XEXP (x, 0))
14981 && COMPARISON_P (XEXP (x, 1)))
14982 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14983 DOM_CC_X_AND_Y);
14985 if (GET_CODE (x) == IOR
14986 && (op == EQ || op == NE)
14987 && COMPARISON_P (XEXP (x, 0))
14988 && COMPARISON_P (XEXP (x, 1)))
14989 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14990 DOM_CC_X_OR_Y);
14992 /* An operation (on Thumb) where we want to test for a single bit.
14993 This is done by shifting that bit up into the top bit of a
14994 scratch register; we can then branch on the sign bit. */
14995 if (TARGET_THUMB1
14996 && GET_MODE (x) == SImode
14997 && (op == EQ || op == NE)
14998 && GET_CODE (x) == ZERO_EXTRACT
14999 && XEXP (x, 1) == const1_rtx)
15000 return CC_Nmode;
15002 /* An operation that sets the condition codes as a side-effect, the
15003 V flag is not set correctly, so we can only use comparisons where
15004 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15005 instead.) */
15006 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15007 if (GET_MODE (x) == SImode
15008 && y == const0_rtx
15009 && (op == EQ || op == NE || op == LT || op == GE)
15010 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15011 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15012 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15013 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15014 || GET_CODE (x) == LSHIFTRT
15015 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15016 || GET_CODE (x) == ROTATERT
15017 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15018 return CC_NOOVmode;
15020 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15021 return CC_Zmode;
15023 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15024 && GET_CODE (x) == PLUS
15025 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15026 return CC_Cmode;
15028 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15030 switch (op)
15032 case EQ:
15033 case NE:
15034 /* A DImode comparison against zero can be implemented by
15035 or'ing the two halves together. */
15036 if (y == const0_rtx)
15037 return CC_Zmode;
15039 /* We can do an equality test in three Thumb instructions. */
15040 if (!TARGET_32BIT)
15041 return CC_Zmode;
15043 /* FALLTHROUGH */
15045 case LTU:
15046 case LEU:
15047 case GTU:
15048 case GEU:
15049 /* DImode unsigned comparisons can be implemented by cmp +
15050 cmpeq without a scratch register. Not worth doing in
15051 Thumb-2. */
15052 if (TARGET_32BIT)
15053 return CC_CZmode;
15055 /* FALLTHROUGH */
15057 case LT:
15058 case LE:
15059 case GT:
15060 case GE:
15061 /* DImode signed and unsigned comparisons can be implemented
15062 by cmp + sbcs with a scratch register, but that does not
15063 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15064 gcc_assert (op != EQ && op != NE);
15065 return CC_NCVmode;
15067 default:
15068 gcc_unreachable ();
15072 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15073 return GET_MODE (x);
15075 return CCmode;
15078 /* X and Y are two things to compare using CODE. Emit the compare insn and
15079 return the rtx for register 0 in the proper mode. FP means this is a
15080 floating point compare: I don't think that it is needed on the arm. */
15082 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15084 enum machine_mode mode;
15085 rtx cc_reg;
15086 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15088 /* We might have X as a constant, Y as a register because of the predicates
15089 used for cmpdi. If so, force X to a register here. */
15090 if (dimode_comparison && !REG_P (x))
15091 x = force_reg (DImode, x);
15093 mode = SELECT_CC_MODE (code, x, y);
15094 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15096 if (dimode_comparison
15097 && mode != CC_CZmode)
15099 rtx clobber, set;
15101 /* To compare two non-zero values for equality, XOR them and
15102 then compare against zero. Not used for ARM mode; there
15103 CC_CZmode is cheaper. */
15104 if (mode == CC_Zmode && y != const0_rtx)
15106 gcc_assert (!reload_completed);
15107 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15108 y = const0_rtx;
15111 /* A scratch register is required. */
15112 if (reload_completed)
15113 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15114 else
15115 scratch = gen_rtx_SCRATCH (SImode);
15117 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15118 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15119 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15121 else
15122 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15124 return cc_reg;
15127 /* Generate a sequence of insns that will generate the correct return
15128 address mask depending on the physical architecture that the program
15129 is running on. */
15131 arm_gen_return_addr_mask (void)
15133 rtx reg = gen_reg_rtx (Pmode);
15135 emit_insn (gen_return_addr_mask (reg));
15136 return reg;
15139 void
15140 arm_reload_in_hi (rtx *operands)
15142 rtx ref = operands[1];
15143 rtx base, scratch;
15144 HOST_WIDE_INT offset = 0;
15146 if (GET_CODE (ref) == SUBREG)
15148 offset = SUBREG_BYTE (ref);
15149 ref = SUBREG_REG (ref);
15152 if (REG_P (ref))
15154 /* We have a pseudo which has been spilt onto the stack; there
15155 are two cases here: the first where there is a simple
15156 stack-slot replacement and a second where the stack-slot is
15157 out of range, or is used as a subreg. */
15158 if (reg_equiv_mem (REGNO (ref)))
15160 ref = reg_equiv_mem (REGNO (ref));
15161 base = find_replacement (&XEXP (ref, 0));
15163 else
15164 /* The slot is out of range, or was dressed up in a SUBREG. */
15165 base = reg_equiv_address (REGNO (ref));
15167 else
15168 base = find_replacement (&XEXP (ref, 0));
15170 /* Handle the case where the address is too complex to be offset by 1. */
15171 if (GET_CODE (base) == MINUS
15172 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15174 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15176 emit_set_insn (base_plus, base);
15177 base = base_plus;
15179 else if (GET_CODE (base) == PLUS)
15181 /* The addend must be CONST_INT, or we would have dealt with it above. */
15182 HOST_WIDE_INT hi, lo;
15184 offset += INTVAL (XEXP (base, 1));
15185 base = XEXP (base, 0);
15187 /* Rework the address into a legal sequence of insns. */
15188 /* Valid range for lo is -4095 -> 4095 */
15189 lo = (offset >= 0
15190 ? (offset & 0xfff)
15191 : -((-offset) & 0xfff));
15193 /* Corner case, if lo is the max offset then we would be out of range
15194 once we have added the additional 1 below, so bump the msb into the
15195 pre-loading insn(s). */
15196 if (lo == 4095)
15197 lo &= 0x7ff;
15199 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15200 ^ (HOST_WIDE_INT) 0x80000000)
15201 - (HOST_WIDE_INT) 0x80000000);
15203 gcc_assert (hi + lo == offset);
15205 if (hi != 0)
15207 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15209 /* Get the base address; addsi3 knows how to handle constants
15210 that require more than one insn. */
15211 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15212 base = base_plus;
15213 offset = lo;
15217 /* Operands[2] may overlap operands[0] (though it won't overlap
15218 operands[1]), that's why we asked for a DImode reg -- so we can
15219 use the bit that does not overlap. */
15220 if (REGNO (operands[2]) == REGNO (operands[0]))
15221 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15222 else
15223 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15225 emit_insn (gen_zero_extendqisi2 (scratch,
15226 gen_rtx_MEM (QImode,
15227 plus_constant (Pmode, base,
15228 offset))));
15229 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15230 gen_rtx_MEM (QImode,
15231 plus_constant (Pmode, base,
15232 offset + 1))));
15233 if (!BYTES_BIG_ENDIAN)
15234 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15235 gen_rtx_IOR (SImode,
15236 gen_rtx_ASHIFT
15237 (SImode,
15238 gen_rtx_SUBREG (SImode, operands[0], 0),
15239 GEN_INT (8)),
15240 scratch));
15241 else
15242 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15243 gen_rtx_IOR (SImode,
15244 gen_rtx_ASHIFT (SImode, scratch,
15245 GEN_INT (8)),
15246 gen_rtx_SUBREG (SImode, operands[0], 0)));
15249 /* Handle storing a half-word to memory during reload by synthesizing as two
15250 byte stores. Take care not to clobber the input values until after we
15251 have moved them somewhere safe. This code assumes that if the DImode
15252 scratch in operands[2] overlaps either the input value or output address
15253 in some way, then that value must die in this insn (we absolutely need
15254 two scratch registers for some corner cases). */
15255 void
15256 arm_reload_out_hi (rtx *operands)
15258 rtx ref = operands[0];
15259 rtx outval = operands[1];
15260 rtx base, scratch;
15261 HOST_WIDE_INT offset = 0;
15263 if (GET_CODE (ref) == SUBREG)
15265 offset = SUBREG_BYTE (ref);
15266 ref = SUBREG_REG (ref);
15269 if (REG_P (ref))
15271 /* We have a pseudo which has been spilt onto the stack; there
15272 are two cases here: the first where there is a simple
15273 stack-slot replacement and a second where the stack-slot is
15274 out of range, or is used as a subreg. */
15275 if (reg_equiv_mem (REGNO (ref)))
15277 ref = reg_equiv_mem (REGNO (ref));
15278 base = find_replacement (&XEXP (ref, 0));
15280 else
15281 /* The slot is out of range, or was dressed up in a SUBREG. */
15282 base = reg_equiv_address (REGNO (ref));
15284 else
15285 base = find_replacement (&XEXP (ref, 0));
15287 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15289 /* Handle the case where the address is too complex to be offset by 1. */
15290 if (GET_CODE (base) == MINUS
15291 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15293 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15295 /* Be careful not to destroy OUTVAL. */
15296 if (reg_overlap_mentioned_p (base_plus, outval))
15298 /* Updating base_plus might destroy outval, see if we can
15299 swap the scratch and base_plus. */
15300 if (!reg_overlap_mentioned_p (scratch, outval))
15302 rtx tmp = scratch;
15303 scratch = base_plus;
15304 base_plus = tmp;
15306 else
15308 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15310 /* Be conservative and copy OUTVAL into the scratch now,
15311 this should only be necessary if outval is a subreg
15312 of something larger than a word. */
15313 /* XXX Might this clobber base? I can't see how it can,
15314 since scratch is known to overlap with OUTVAL, and
15315 must be wider than a word. */
15316 emit_insn (gen_movhi (scratch_hi, outval));
15317 outval = scratch_hi;
15321 emit_set_insn (base_plus, base);
15322 base = base_plus;
15324 else if (GET_CODE (base) == PLUS)
15326 /* The addend must be CONST_INT, or we would have dealt with it above. */
15327 HOST_WIDE_INT hi, lo;
15329 offset += INTVAL (XEXP (base, 1));
15330 base = XEXP (base, 0);
15332 /* Rework the address into a legal sequence of insns. */
15333 /* Valid range for lo is -4095 -> 4095 */
15334 lo = (offset >= 0
15335 ? (offset & 0xfff)
15336 : -((-offset) & 0xfff));
15338 /* Corner case, if lo is the max offset then we would be out of range
15339 once we have added the additional 1 below, so bump the msb into the
15340 pre-loading insn(s). */
15341 if (lo == 4095)
15342 lo &= 0x7ff;
15344 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15345 ^ (HOST_WIDE_INT) 0x80000000)
15346 - (HOST_WIDE_INT) 0x80000000);
15348 gcc_assert (hi + lo == offset);
15350 if (hi != 0)
15352 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15354 /* Be careful not to destroy OUTVAL. */
15355 if (reg_overlap_mentioned_p (base_plus, outval))
15357 /* Updating base_plus might destroy outval, see if we
15358 can swap the scratch and base_plus. */
15359 if (!reg_overlap_mentioned_p (scratch, outval))
15361 rtx tmp = scratch;
15362 scratch = base_plus;
15363 base_plus = tmp;
15365 else
15367 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15369 /* Be conservative and copy outval into scratch now,
15370 this should only be necessary if outval is a
15371 subreg of something larger than a word. */
15372 /* XXX Might this clobber base? I can't see how it
15373 can, since scratch is known to overlap with
15374 outval. */
15375 emit_insn (gen_movhi (scratch_hi, outval));
15376 outval = scratch_hi;
15380 /* Get the base address; addsi3 knows how to handle constants
15381 that require more than one insn. */
15382 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15383 base = base_plus;
15384 offset = lo;
15388 if (BYTES_BIG_ENDIAN)
15390 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15391 plus_constant (Pmode, base,
15392 offset + 1)),
15393 gen_lowpart (QImode, outval)));
15394 emit_insn (gen_lshrsi3 (scratch,
15395 gen_rtx_SUBREG (SImode, outval, 0),
15396 GEN_INT (8)));
15397 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15398 offset)),
15399 gen_lowpart (QImode, scratch)));
15401 else
15403 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15404 offset)),
15405 gen_lowpart (QImode, outval)));
15406 emit_insn (gen_lshrsi3 (scratch,
15407 gen_rtx_SUBREG (SImode, outval, 0),
15408 GEN_INT (8)));
15409 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15410 plus_constant (Pmode, base,
15411 offset + 1)),
15412 gen_lowpart (QImode, scratch)));
15416 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15417 (padded to the size of a word) should be passed in a register. */
15419 static bool
15420 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15422 if (TARGET_AAPCS_BASED)
15423 return must_pass_in_stack_var_size (mode, type);
15424 else
15425 return must_pass_in_stack_var_size_or_pad (mode, type);
15429 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15430 Return true if an argument passed on the stack should be padded upwards,
15431 i.e. if the least-significant byte has useful data.
15432 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15433 aggregate types are placed in the lowest memory address. */
15435 bool
15436 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15438 if (!TARGET_AAPCS_BASED)
15439 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15441 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15442 return false;
15444 return true;
15448 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15449 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15450 register has useful data, and return the opposite if the most
15451 significant byte does. */
15453 bool
15454 arm_pad_reg_upward (enum machine_mode mode,
15455 tree type, int first ATTRIBUTE_UNUSED)
15457 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15459 /* For AAPCS, small aggregates, small fixed-point types,
15460 and small complex types are always padded upwards. */
15461 if (type)
15463 if ((AGGREGATE_TYPE_P (type)
15464 || TREE_CODE (type) == COMPLEX_TYPE
15465 || FIXED_POINT_TYPE_P (type))
15466 && int_size_in_bytes (type) <= 4)
15467 return true;
15469 else
15471 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15472 && GET_MODE_SIZE (mode) <= 4)
15473 return true;
15477 /* Otherwise, use default padding. */
15478 return !BYTES_BIG_ENDIAN;
15481 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15482 assuming that the address in the base register is word aligned. */
15483 bool
15484 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15486 HOST_WIDE_INT max_offset;
15488 /* Offset must be a multiple of 4 in Thumb mode. */
15489 if (TARGET_THUMB2 && ((offset & 3) != 0))
15490 return false;
15492 if (TARGET_THUMB2)
15493 max_offset = 1020;
15494 else if (TARGET_ARM)
15495 max_offset = 255;
15496 else
15497 return false;
15499 return ((offset <= max_offset) && (offset >= -max_offset));
15502 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15503 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15504 Assumes that the address in the base register RN is word aligned. Pattern
15505 guarantees that both memory accesses use the same base register,
15506 the offsets are constants within the range, and the gap between the offsets is 4.
15507 If preload complete then check that registers are legal. WBACK indicates whether
15508 address is updated. LOAD indicates whether memory access is load or store. */
15509 bool
15510 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15511 bool wback, bool load)
15513 unsigned int t, t2, n;
15515 if (!reload_completed)
15516 return true;
15518 if (!offset_ok_for_ldrd_strd (offset))
15519 return false;
15521 t = REGNO (rt);
15522 t2 = REGNO (rt2);
15523 n = REGNO (rn);
15525 if ((TARGET_THUMB2)
15526 && ((wback && (n == t || n == t2))
15527 || (t == SP_REGNUM)
15528 || (t == PC_REGNUM)
15529 || (t2 == SP_REGNUM)
15530 || (t2 == PC_REGNUM)
15531 || (!load && (n == PC_REGNUM))
15532 || (load && (t == t2))
15533 /* Triggers Cortex-M3 LDRD errata. */
15534 || (!wback && load && fix_cm3_ldrd && (n == t))))
15535 return false;
15537 if ((TARGET_ARM)
15538 && ((wback && (n == t || n == t2))
15539 || (t2 == PC_REGNUM)
15540 || (t % 2 != 0) /* First destination register is not even. */
15541 || (t2 != t + 1)
15542 /* PC can be used as base register (for offset addressing only),
15543 but it is depricated. */
15544 || (n == PC_REGNUM)))
15545 return false;
15547 return true;
15550 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15551 operand MEM's address contains an immediate offset from the base
15552 register and has no side effects, in which case it sets BASE and
15553 OFFSET accordingly. */
15554 static bool
15555 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15557 rtx addr;
15559 gcc_assert (base != NULL && offset != NULL);
15561 /* TODO: Handle more general memory operand patterns, such as
15562 PRE_DEC and PRE_INC. */
15564 if (side_effects_p (mem))
15565 return false;
15567 /* Can't deal with subregs. */
15568 if (GET_CODE (mem) == SUBREG)
15569 return false;
15571 gcc_assert (MEM_P (mem));
15573 *offset = const0_rtx;
15575 addr = XEXP (mem, 0);
15577 /* If addr isn't valid for DImode, then we can't handle it. */
15578 if (!arm_legitimate_address_p (DImode, addr,
15579 reload_in_progress || reload_completed))
15580 return false;
15582 if (REG_P (addr))
15584 *base = addr;
15585 return true;
15587 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15589 *base = XEXP (addr, 0);
15590 *offset = XEXP (addr, 1);
15591 return (REG_P (*base) && CONST_INT_P (*offset));
15594 return false;
15597 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15599 /* Called from a peephole2 to replace two word-size accesses with a
15600 single LDRD/STRD instruction. Returns true iff we can generate a
15601 new instruction sequence. That is, both accesses use the same base
15602 register and the gap between constant offsets is 4. This function
15603 may reorder its operands to match ldrd/strd RTL templates.
15604 OPERANDS are the operands found by the peephole matcher;
15605 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15606 corresponding memory operands. LOAD indicaates whether the access
15607 is load or store. CONST_STORE indicates a store of constant
15608 integer values held in OPERANDS[4,5] and assumes that the pattern
15609 is of length 4 insn, for the purpose of checking dead registers.
15610 COMMUTE indicates that register operands may be reordered. */
15611 bool
15612 gen_operands_ldrd_strd (rtx *operands, bool load,
15613 bool const_store, bool commute)
15615 int nops = 2;
15616 HOST_WIDE_INT offsets[2], offset;
15617 rtx base = NULL_RTX;
15618 rtx cur_base, cur_offset, tmp;
15619 int i, gap;
15620 HARD_REG_SET regset;
15622 gcc_assert (!const_store || !load);
15623 /* Check that the memory references are immediate offsets from the
15624 same base register. Extract the base register, the destination
15625 registers, and the corresponding memory offsets. */
15626 for (i = 0; i < nops; i++)
15628 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15629 return false;
15631 if (i == 0)
15632 base = cur_base;
15633 else if (REGNO (base) != REGNO (cur_base))
15634 return false;
15636 offsets[i] = INTVAL (cur_offset);
15637 if (GET_CODE (operands[i]) == SUBREG)
15639 tmp = SUBREG_REG (operands[i]);
15640 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15641 operands[i] = tmp;
15645 /* Make sure there is no dependency between the individual loads. */
15646 if (load && REGNO (operands[0]) == REGNO (base))
15647 return false; /* RAW */
15649 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15650 return false; /* WAW */
15652 /* If the same input register is used in both stores
15653 when storing different constants, try to find a free register.
15654 For example, the code
15655 mov r0, 0
15656 str r0, [r2]
15657 mov r0, 1
15658 str r0, [r2, #4]
15659 can be transformed into
15660 mov r1, 0
15661 strd r1, r0, [r2]
15662 in Thumb mode assuming that r1 is free. */
15663 if (const_store
15664 && REGNO (operands[0]) == REGNO (operands[1])
15665 && INTVAL (operands[4]) != INTVAL (operands[5]))
15667 if (TARGET_THUMB2)
15669 CLEAR_HARD_REG_SET (regset);
15670 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15671 if (tmp == NULL_RTX)
15672 return false;
15674 /* Use the new register in the first load to ensure that
15675 if the original input register is not dead after peephole,
15676 then it will have the correct constant value. */
15677 operands[0] = tmp;
15679 else if (TARGET_ARM)
15681 return false;
15682 int regno = REGNO (operands[0]);
15683 if (!peep2_reg_dead_p (4, operands[0]))
15685 /* When the input register is even and is not dead after the
15686 pattern, it has to hold the second constant but we cannot
15687 form a legal STRD in ARM mode with this register as the second
15688 register. */
15689 if (regno % 2 == 0)
15690 return false;
15692 /* Is regno-1 free? */
15693 SET_HARD_REG_SET (regset);
15694 CLEAR_HARD_REG_BIT(regset, regno - 1);
15695 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15696 if (tmp == NULL_RTX)
15697 return false;
15699 operands[0] = tmp;
15701 else
15703 /* Find a DImode register. */
15704 CLEAR_HARD_REG_SET (regset);
15705 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15706 if (tmp != NULL_RTX)
15708 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15709 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15711 else
15713 /* Can we use the input register to form a DI register? */
15714 SET_HARD_REG_SET (regset);
15715 CLEAR_HARD_REG_BIT(regset,
15716 regno % 2 == 0 ? regno + 1 : regno - 1);
15717 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15718 if (tmp == NULL_RTX)
15719 return false;
15720 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15724 gcc_assert (operands[0] != NULL_RTX);
15725 gcc_assert (operands[1] != NULL_RTX);
15726 gcc_assert (REGNO (operands[0]) % 2 == 0);
15727 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15731 /* Make sure the instructions are ordered with lower memory access first. */
15732 if (offsets[0] > offsets[1])
15734 gap = offsets[0] - offsets[1];
15735 offset = offsets[1];
15737 /* Swap the instructions such that lower memory is accessed first. */
15738 SWAP_RTX (operands[0], operands[1]);
15739 SWAP_RTX (operands[2], operands[3]);
15740 if (const_store)
15741 SWAP_RTX (operands[4], operands[5]);
15743 else
15745 gap = offsets[1] - offsets[0];
15746 offset = offsets[0];
15749 /* Make sure accesses are to consecutive memory locations. */
15750 if (gap != 4)
15751 return false;
15753 /* Make sure we generate legal instructions. */
15754 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15755 false, load))
15756 return true;
15758 /* In Thumb state, where registers are almost unconstrained, there
15759 is little hope to fix it. */
15760 if (TARGET_THUMB2)
15761 return false;
15763 if (load && commute)
15765 /* Try reordering registers. */
15766 SWAP_RTX (operands[0], operands[1]);
15767 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15768 false, load))
15769 return true;
15772 if (const_store)
15774 /* If input registers are dead after this pattern, they can be
15775 reordered or replaced by other registers that are free in the
15776 current pattern. */
15777 if (!peep2_reg_dead_p (4, operands[0])
15778 || !peep2_reg_dead_p (4, operands[1]))
15779 return false;
15781 /* Try to reorder the input registers. */
15782 /* For example, the code
15783 mov r0, 0
15784 mov r1, 1
15785 str r1, [r2]
15786 str r0, [r2, #4]
15787 can be transformed into
15788 mov r1, 0
15789 mov r0, 1
15790 strd r0, [r2]
15792 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15793 false, false))
15795 SWAP_RTX (operands[0], operands[1]);
15796 return true;
15799 /* Try to find a free DI register. */
15800 CLEAR_HARD_REG_SET (regset);
15801 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15802 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15803 while (true)
15805 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15806 if (tmp == NULL_RTX)
15807 return false;
15809 /* DREG must be an even-numbered register in DImode.
15810 Split it into SI registers. */
15811 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15812 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15813 gcc_assert (operands[0] != NULL_RTX);
15814 gcc_assert (operands[1] != NULL_RTX);
15815 gcc_assert (REGNO (operands[0]) % 2 == 0);
15816 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15818 return (operands_ok_ldrd_strd (operands[0], operands[1],
15819 base, offset,
15820 false, load));
15824 return false;
15826 #undef SWAP_RTX
15831 /* Print a symbolic form of X to the debug file, F. */
15832 static void
15833 arm_print_value (FILE *f, rtx x)
15835 switch (GET_CODE (x))
15837 case CONST_INT:
15838 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15839 return;
15841 case CONST_DOUBLE:
15842 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15843 return;
15845 case CONST_VECTOR:
15847 int i;
15849 fprintf (f, "<");
15850 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15852 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15853 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15854 fputc (',', f);
15856 fprintf (f, ">");
15858 return;
15860 case CONST_STRING:
15861 fprintf (f, "\"%s\"", XSTR (x, 0));
15862 return;
15864 case SYMBOL_REF:
15865 fprintf (f, "`%s'", XSTR (x, 0));
15866 return;
15868 case LABEL_REF:
15869 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15870 return;
15872 case CONST:
15873 arm_print_value (f, XEXP (x, 0));
15874 return;
15876 case PLUS:
15877 arm_print_value (f, XEXP (x, 0));
15878 fprintf (f, "+");
15879 arm_print_value (f, XEXP (x, 1));
15880 return;
15882 case PC:
15883 fprintf (f, "pc");
15884 return;
15886 default:
15887 fprintf (f, "????");
15888 return;
15892 /* Routines for manipulation of the constant pool. */
15894 /* Arm instructions cannot load a large constant directly into a
15895 register; they have to come from a pc relative load. The constant
15896 must therefore be placed in the addressable range of the pc
15897 relative load. Depending on the precise pc relative load
15898 instruction the range is somewhere between 256 bytes and 4k. This
15899 means that we often have to dump a constant inside a function, and
15900 generate code to branch around it.
15902 It is important to minimize this, since the branches will slow
15903 things down and make the code larger.
15905 Normally we can hide the table after an existing unconditional
15906 branch so that there is no interruption of the flow, but in the
15907 worst case the code looks like this:
15909 ldr rn, L1
15911 b L2
15912 align
15913 L1: .long value
15917 ldr rn, L3
15919 b L4
15920 align
15921 L3: .long value
15925 We fix this by performing a scan after scheduling, which notices
15926 which instructions need to have their operands fetched from the
15927 constant table and builds the table.
15929 The algorithm starts by building a table of all the constants that
15930 need fixing up and all the natural barriers in the function (places
15931 where a constant table can be dropped without breaking the flow).
15932 For each fixup we note how far the pc-relative replacement will be
15933 able to reach and the offset of the instruction into the function.
15935 Having built the table we then group the fixes together to form
15936 tables that are as large as possible (subject to addressing
15937 constraints) and emit each table of constants after the last
15938 barrier that is within range of all the instructions in the group.
15939 If a group does not contain a barrier, then we forcibly create one
15940 by inserting a jump instruction into the flow. Once the table has
15941 been inserted, the insns are then modified to reference the
15942 relevant entry in the pool.
15944 Possible enhancements to the algorithm (not implemented) are:
15946 1) For some processors and object formats, there may be benefit in
15947 aligning the pools to the start of cache lines; this alignment
15948 would need to be taken into account when calculating addressability
15949 of a pool. */
15951 /* These typedefs are located at the start of this file, so that
15952 they can be used in the prototypes there. This comment is to
15953 remind readers of that fact so that the following structures
15954 can be understood more easily.
15956 typedef struct minipool_node Mnode;
15957 typedef struct minipool_fixup Mfix; */
15959 struct minipool_node
15961 /* Doubly linked chain of entries. */
15962 Mnode * next;
15963 Mnode * prev;
15964 /* The maximum offset into the code that this entry can be placed. While
15965 pushing fixes for forward references, all entries are sorted in order
15966 of increasing max_address. */
15967 HOST_WIDE_INT max_address;
15968 /* Similarly for an entry inserted for a backwards ref. */
15969 HOST_WIDE_INT min_address;
15970 /* The number of fixes referencing this entry. This can become zero
15971 if we "unpush" an entry. In this case we ignore the entry when we
15972 come to emit the code. */
15973 int refcount;
15974 /* The offset from the start of the minipool. */
15975 HOST_WIDE_INT offset;
15976 /* The value in table. */
15977 rtx value;
15978 /* The mode of value. */
15979 enum machine_mode mode;
15980 /* The size of the value. With iWMMXt enabled
15981 sizes > 4 also imply an alignment of 8-bytes. */
15982 int fix_size;
15985 struct minipool_fixup
15987 Mfix * next;
15988 rtx insn;
15989 HOST_WIDE_INT address;
15990 rtx * loc;
15991 enum machine_mode mode;
15992 int fix_size;
15993 rtx value;
15994 Mnode * minipool;
15995 HOST_WIDE_INT forwards;
15996 HOST_WIDE_INT backwards;
15999 /* Fixes less than a word need padding out to a word boundary. */
16000 #define MINIPOOL_FIX_SIZE(mode) \
16001 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16003 static Mnode * minipool_vector_head;
16004 static Mnode * minipool_vector_tail;
16005 static rtx minipool_vector_label;
16006 static int minipool_pad;
16008 /* The linked list of all minipool fixes required for this function. */
16009 Mfix * minipool_fix_head;
16010 Mfix * minipool_fix_tail;
16011 /* The fix entry for the current minipool, once it has been placed. */
16012 Mfix * minipool_barrier;
16014 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16015 #define JUMP_TABLES_IN_TEXT_SECTION 0
16016 #endif
16018 static HOST_WIDE_INT
16019 get_jump_table_size (rtx insn)
16021 /* ADDR_VECs only take room if read-only data does into the text
16022 section. */
16023 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16025 rtx body = PATTERN (insn);
16026 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16027 HOST_WIDE_INT size;
16028 HOST_WIDE_INT modesize;
16030 modesize = GET_MODE_SIZE (GET_MODE (body));
16031 size = modesize * XVECLEN (body, elt);
16032 switch (modesize)
16034 case 1:
16035 /* Round up size of TBB table to a halfword boundary. */
16036 size = (size + 1) & ~(HOST_WIDE_INT)1;
16037 break;
16038 case 2:
16039 /* No padding necessary for TBH. */
16040 break;
16041 case 4:
16042 /* Add two bytes for alignment on Thumb. */
16043 if (TARGET_THUMB)
16044 size += 2;
16045 break;
16046 default:
16047 gcc_unreachable ();
16049 return size;
16052 return 0;
16055 /* Return the maximum amount of padding that will be inserted before
16056 label LABEL. */
16058 static HOST_WIDE_INT
16059 get_label_padding (rtx label)
16061 HOST_WIDE_INT align, min_insn_size;
16063 align = 1 << label_to_alignment (label);
16064 min_insn_size = TARGET_THUMB ? 2 : 4;
16065 return align > min_insn_size ? align - min_insn_size : 0;
16068 /* Move a minipool fix MP from its current location to before MAX_MP.
16069 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16070 constraints may need updating. */
16071 static Mnode *
16072 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16073 HOST_WIDE_INT max_address)
16075 /* The code below assumes these are different. */
16076 gcc_assert (mp != max_mp);
16078 if (max_mp == NULL)
16080 if (max_address < mp->max_address)
16081 mp->max_address = max_address;
16083 else
16085 if (max_address > max_mp->max_address - mp->fix_size)
16086 mp->max_address = max_mp->max_address - mp->fix_size;
16087 else
16088 mp->max_address = max_address;
16090 /* Unlink MP from its current position. Since max_mp is non-null,
16091 mp->prev must be non-null. */
16092 mp->prev->next = mp->next;
16093 if (mp->next != NULL)
16094 mp->next->prev = mp->prev;
16095 else
16096 minipool_vector_tail = mp->prev;
16098 /* Re-insert it before MAX_MP. */
16099 mp->next = max_mp;
16100 mp->prev = max_mp->prev;
16101 max_mp->prev = mp;
16103 if (mp->prev != NULL)
16104 mp->prev->next = mp;
16105 else
16106 minipool_vector_head = mp;
16109 /* Save the new entry. */
16110 max_mp = mp;
16112 /* Scan over the preceding entries and adjust their addresses as
16113 required. */
16114 while (mp->prev != NULL
16115 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16117 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16118 mp = mp->prev;
16121 return max_mp;
16124 /* Add a constant to the minipool for a forward reference. Returns the
16125 node added or NULL if the constant will not fit in this pool. */
16126 static Mnode *
16127 add_minipool_forward_ref (Mfix *fix)
16129 /* If set, max_mp is the first pool_entry that has a lower
16130 constraint than the one we are trying to add. */
16131 Mnode * max_mp = NULL;
16132 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16133 Mnode * mp;
16135 /* If the minipool starts before the end of FIX->INSN then this FIX
16136 can not be placed into the current pool. Furthermore, adding the
16137 new constant pool entry may cause the pool to start FIX_SIZE bytes
16138 earlier. */
16139 if (minipool_vector_head &&
16140 (fix->address + get_attr_length (fix->insn)
16141 >= minipool_vector_head->max_address - fix->fix_size))
16142 return NULL;
16144 /* Scan the pool to see if a constant with the same value has
16145 already been added. While we are doing this, also note the
16146 location where we must insert the constant if it doesn't already
16147 exist. */
16148 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16150 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16151 && fix->mode == mp->mode
16152 && (!LABEL_P (fix->value)
16153 || (CODE_LABEL_NUMBER (fix->value)
16154 == CODE_LABEL_NUMBER (mp->value)))
16155 && rtx_equal_p (fix->value, mp->value))
16157 /* More than one fix references this entry. */
16158 mp->refcount++;
16159 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16162 /* Note the insertion point if necessary. */
16163 if (max_mp == NULL
16164 && mp->max_address > max_address)
16165 max_mp = mp;
16167 /* If we are inserting an 8-bytes aligned quantity and
16168 we have not already found an insertion point, then
16169 make sure that all such 8-byte aligned quantities are
16170 placed at the start of the pool. */
16171 if (ARM_DOUBLEWORD_ALIGN
16172 && max_mp == NULL
16173 && fix->fix_size >= 8
16174 && mp->fix_size < 8)
16176 max_mp = mp;
16177 max_address = mp->max_address;
16181 /* The value is not currently in the minipool, so we need to create
16182 a new entry for it. If MAX_MP is NULL, the entry will be put on
16183 the end of the list since the placement is less constrained than
16184 any existing entry. Otherwise, we insert the new fix before
16185 MAX_MP and, if necessary, adjust the constraints on the other
16186 entries. */
16187 mp = XNEW (Mnode);
16188 mp->fix_size = fix->fix_size;
16189 mp->mode = fix->mode;
16190 mp->value = fix->value;
16191 mp->refcount = 1;
16192 /* Not yet required for a backwards ref. */
16193 mp->min_address = -65536;
16195 if (max_mp == NULL)
16197 mp->max_address = max_address;
16198 mp->next = NULL;
16199 mp->prev = minipool_vector_tail;
16201 if (mp->prev == NULL)
16203 minipool_vector_head = mp;
16204 minipool_vector_label = gen_label_rtx ();
16206 else
16207 mp->prev->next = mp;
16209 minipool_vector_tail = mp;
16211 else
16213 if (max_address > max_mp->max_address - mp->fix_size)
16214 mp->max_address = max_mp->max_address - mp->fix_size;
16215 else
16216 mp->max_address = max_address;
16218 mp->next = max_mp;
16219 mp->prev = max_mp->prev;
16220 max_mp->prev = mp;
16221 if (mp->prev != NULL)
16222 mp->prev->next = mp;
16223 else
16224 minipool_vector_head = mp;
16227 /* Save the new entry. */
16228 max_mp = mp;
16230 /* Scan over the preceding entries and adjust their addresses as
16231 required. */
16232 while (mp->prev != NULL
16233 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16235 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16236 mp = mp->prev;
16239 return max_mp;
16242 static Mnode *
16243 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16244 HOST_WIDE_INT min_address)
16246 HOST_WIDE_INT offset;
16248 /* The code below assumes these are different. */
16249 gcc_assert (mp != min_mp);
16251 if (min_mp == NULL)
16253 if (min_address > mp->min_address)
16254 mp->min_address = min_address;
16256 else
16258 /* We will adjust this below if it is too loose. */
16259 mp->min_address = min_address;
16261 /* Unlink MP from its current position. Since min_mp is non-null,
16262 mp->next must be non-null. */
16263 mp->next->prev = mp->prev;
16264 if (mp->prev != NULL)
16265 mp->prev->next = mp->next;
16266 else
16267 minipool_vector_head = mp->next;
16269 /* Reinsert it after MIN_MP. */
16270 mp->prev = min_mp;
16271 mp->next = min_mp->next;
16272 min_mp->next = mp;
16273 if (mp->next != NULL)
16274 mp->next->prev = mp;
16275 else
16276 minipool_vector_tail = mp;
16279 min_mp = mp;
16281 offset = 0;
16282 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16284 mp->offset = offset;
16285 if (mp->refcount > 0)
16286 offset += mp->fix_size;
16288 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16289 mp->next->min_address = mp->min_address + mp->fix_size;
16292 return min_mp;
16295 /* Add a constant to the minipool for a backward reference. Returns the
16296 node added or NULL if the constant will not fit in this pool.
16298 Note that the code for insertion for a backwards reference can be
16299 somewhat confusing because the calculated offsets for each fix do
16300 not take into account the size of the pool (which is still under
16301 construction. */
16302 static Mnode *
16303 add_minipool_backward_ref (Mfix *fix)
16305 /* If set, min_mp is the last pool_entry that has a lower constraint
16306 than the one we are trying to add. */
16307 Mnode *min_mp = NULL;
16308 /* This can be negative, since it is only a constraint. */
16309 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16310 Mnode *mp;
16312 /* If we can't reach the current pool from this insn, or if we can't
16313 insert this entry at the end of the pool without pushing other
16314 fixes out of range, then we don't try. This ensures that we
16315 can't fail later on. */
16316 if (min_address >= minipool_barrier->address
16317 || (minipool_vector_tail->min_address + fix->fix_size
16318 >= minipool_barrier->address))
16319 return NULL;
16321 /* Scan the pool to see if a constant with the same value has
16322 already been added. While we are doing this, also note the
16323 location where we must insert the constant if it doesn't already
16324 exist. */
16325 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16327 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16328 && fix->mode == mp->mode
16329 && (!LABEL_P (fix->value)
16330 || (CODE_LABEL_NUMBER (fix->value)
16331 == CODE_LABEL_NUMBER (mp->value)))
16332 && rtx_equal_p (fix->value, mp->value)
16333 /* Check that there is enough slack to move this entry to the
16334 end of the table (this is conservative). */
16335 && (mp->max_address
16336 > (minipool_barrier->address
16337 + minipool_vector_tail->offset
16338 + minipool_vector_tail->fix_size)))
16340 mp->refcount++;
16341 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16344 if (min_mp != NULL)
16345 mp->min_address += fix->fix_size;
16346 else
16348 /* Note the insertion point if necessary. */
16349 if (mp->min_address < min_address)
16351 /* For now, we do not allow the insertion of 8-byte alignment
16352 requiring nodes anywhere but at the start of the pool. */
16353 if (ARM_DOUBLEWORD_ALIGN
16354 && fix->fix_size >= 8 && mp->fix_size < 8)
16355 return NULL;
16356 else
16357 min_mp = mp;
16359 else if (mp->max_address
16360 < minipool_barrier->address + mp->offset + fix->fix_size)
16362 /* Inserting before this entry would push the fix beyond
16363 its maximum address (which can happen if we have
16364 re-located a forwards fix); force the new fix to come
16365 after it. */
16366 if (ARM_DOUBLEWORD_ALIGN
16367 && fix->fix_size >= 8 && mp->fix_size < 8)
16368 return NULL;
16369 else
16371 min_mp = mp;
16372 min_address = mp->min_address + fix->fix_size;
16375 /* Do not insert a non-8-byte aligned quantity before 8-byte
16376 aligned quantities. */
16377 else if (ARM_DOUBLEWORD_ALIGN
16378 && fix->fix_size < 8
16379 && mp->fix_size >= 8)
16381 min_mp = mp;
16382 min_address = mp->min_address + fix->fix_size;
16387 /* We need to create a new entry. */
16388 mp = XNEW (Mnode);
16389 mp->fix_size = fix->fix_size;
16390 mp->mode = fix->mode;
16391 mp->value = fix->value;
16392 mp->refcount = 1;
16393 mp->max_address = minipool_barrier->address + 65536;
16395 mp->min_address = min_address;
16397 if (min_mp == NULL)
16399 mp->prev = NULL;
16400 mp->next = minipool_vector_head;
16402 if (mp->next == NULL)
16404 minipool_vector_tail = mp;
16405 minipool_vector_label = gen_label_rtx ();
16407 else
16408 mp->next->prev = mp;
16410 minipool_vector_head = mp;
16412 else
16414 mp->next = min_mp->next;
16415 mp->prev = min_mp;
16416 min_mp->next = mp;
16418 if (mp->next != NULL)
16419 mp->next->prev = mp;
16420 else
16421 minipool_vector_tail = mp;
16424 /* Save the new entry. */
16425 min_mp = mp;
16427 if (mp->prev)
16428 mp = mp->prev;
16429 else
16430 mp->offset = 0;
16432 /* Scan over the following entries and adjust their offsets. */
16433 while (mp->next != NULL)
16435 if (mp->next->min_address < mp->min_address + mp->fix_size)
16436 mp->next->min_address = mp->min_address + mp->fix_size;
16438 if (mp->refcount)
16439 mp->next->offset = mp->offset + mp->fix_size;
16440 else
16441 mp->next->offset = mp->offset;
16443 mp = mp->next;
16446 return min_mp;
16449 static void
16450 assign_minipool_offsets (Mfix *barrier)
16452 HOST_WIDE_INT offset = 0;
16453 Mnode *mp;
16455 minipool_barrier = barrier;
16457 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16459 mp->offset = offset;
16461 if (mp->refcount > 0)
16462 offset += mp->fix_size;
16466 /* Output the literal table */
16467 static void
16468 dump_minipool (rtx scan)
16470 Mnode * mp;
16471 Mnode * nmp;
16472 int align64 = 0;
16474 if (ARM_DOUBLEWORD_ALIGN)
16475 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16476 if (mp->refcount > 0 && mp->fix_size >= 8)
16478 align64 = 1;
16479 break;
16482 if (dump_file)
16483 fprintf (dump_file,
16484 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16485 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16487 scan = emit_label_after (gen_label_rtx (), scan);
16488 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16489 scan = emit_label_after (minipool_vector_label, scan);
16491 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16493 if (mp->refcount > 0)
16495 if (dump_file)
16497 fprintf (dump_file,
16498 ";; Offset %u, min %ld, max %ld ",
16499 (unsigned) mp->offset, (unsigned long) mp->min_address,
16500 (unsigned long) mp->max_address);
16501 arm_print_value (dump_file, mp->value);
16502 fputc ('\n', dump_file);
16505 switch (mp->fix_size)
16507 #ifdef HAVE_consttable_1
16508 case 1:
16509 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16510 break;
16512 #endif
16513 #ifdef HAVE_consttable_2
16514 case 2:
16515 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16516 break;
16518 #endif
16519 #ifdef HAVE_consttable_4
16520 case 4:
16521 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16522 break;
16524 #endif
16525 #ifdef HAVE_consttable_8
16526 case 8:
16527 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16528 break;
16530 #endif
16531 #ifdef HAVE_consttable_16
16532 case 16:
16533 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16534 break;
16536 #endif
16537 default:
16538 gcc_unreachable ();
16542 nmp = mp->next;
16543 free (mp);
16546 minipool_vector_head = minipool_vector_tail = NULL;
16547 scan = emit_insn_after (gen_consttable_end (), scan);
16548 scan = emit_barrier_after (scan);
16551 /* Return the cost of forcibly inserting a barrier after INSN. */
16552 static int
16553 arm_barrier_cost (rtx insn)
16555 /* Basing the location of the pool on the loop depth is preferable,
16556 but at the moment, the basic block information seems to be
16557 corrupt by this stage of the compilation. */
16558 int base_cost = 50;
16559 rtx next = next_nonnote_insn (insn);
16561 if (next != NULL && LABEL_P (next))
16562 base_cost -= 20;
16564 switch (GET_CODE (insn))
16566 case CODE_LABEL:
16567 /* It will always be better to place the table before the label, rather
16568 than after it. */
16569 return 50;
16571 case INSN:
16572 case CALL_INSN:
16573 return base_cost;
16575 case JUMP_INSN:
16576 return base_cost - 10;
16578 default:
16579 return base_cost + 10;
16583 /* Find the best place in the insn stream in the range
16584 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16585 Create the barrier by inserting a jump and add a new fix entry for
16586 it. */
16587 static Mfix *
16588 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16590 HOST_WIDE_INT count = 0;
16591 rtx barrier;
16592 rtx from = fix->insn;
16593 /* The instruction after which we will insert the jump. */
16594 rtx selected = NULL;
16595 int selected_cost;
16596 /* The address at which the jump instruction will be placed. */
16597 HOST_WIDE_INT selected_address;
16598 Mfix * new_fix;
16599 HOST_WIDE_INT max_count = max_address - fix->address;
16600 rtx label = gen_label_rtx ();
16602 selected_cost = arm_barrier_cost (from);
16603 selected_address = fix->address;
16605 while (from && count < max_count)
16607 rtx tmp;
16608 int new_cost;
16610 /* This code shouldn't have been called if there was a natural barrier
16611 within range. */
16612 gcc_assert (!BARRIER_P (from));
16614 /* Count the length of this insn. This must stay in sync with the
16615 code that pushes minipool fixes. */
16616 if (LABEL_P (from))
16617 count += get_label_padding (from);
16618 else
16619 count += get_attr_length (from);
16621 /* If there is a jump table, add its length. */
16622 if (tablejump_p (from, NULL, &tmp))
16624 count += get_jump_table_size (tmp);
16626 /* Jump tables aren't in a basic block, so base the cost on
16627 the dispatch insn. If we select this location, we will
16628 still put the pool after the table. */
16629 new_cost = arm_barrier_cost (from);
16631 if (count < max_count
16632 && (!selected || new_cost <= selected_cost))
16634 selected = tmp;
16635 selected_cost = new_cost;
16636 selected_address = fix->address + count;
16639 /* Continue after the dispatch table. */
16640 from = NEXT_INSN (tmp);
16641 continue;
16644 new_cost = arm_barrier_cost (from);
16646 if (count < max_count
16647 && (!selected || new_cost <= selected_cost))
16649 selected = from;
16650 selected_cost = new_cost;
16651 selected_address = fix->address + count;
16654 from = NEXT_INSN (from);
16657 /* Make sure that we found a place to insert the jump. */
16658 gcc_assert (selected);
16660 /* Make sure we do not split a call and its corresponding
16661 CALL_ARG_LOCATION note. */
16662 if (CALL_P (selected))
16664 rtx next = NEXT_INSN (selected);
16665 if (next && NOTE_P (next)
16666 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16667 selected = next;
16670 /* Create a new JUMP_INSN that branches around a barrier. */
16671 from = emit_jump_insn_after (gen_jump (label), selected);
16672 JUMP_LABEL (from) = label;
16673 barrier = emit_barrier_after (from);
16674 emit_label_after (label, barrier);
16676 /* Create a minipool barrier entry for the new barrier. */
16677 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16678 new_fix->insn = barrier;
16679 new_fix->address = selected_address;
16680 new_fix->next = fix->next;
16681 fix->next = new_fix;
16683 return new_fix;
16686 /* Record that there is a natural barrier in the insn stream at
16687 ADDRESS. */
16688 static void
16689 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16691 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16693 fix->insn = insn;
16694 fix->address = address;
16696 fix->next = NULL;
16697 if (minipool_fix_head != NULL)
16698 minipool_fix_tail->next = fix;
16699 else
16700 minipool_fix_head = fix;
16702 minipool_fix_tail = fix;
16705 /* Record INSN, which will need fixing up to load a value from the
16706 minipool. ADDRESS is the offset of the insn since the start of the
16707 function; LOC is a pointer to the part of the insn which requires
16708 fixing; VALUE is the constant that must be loaded, which is of type
16709 MODE. */
16710 static void
16711 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16712 enum machine_mode mode, rtx value)
16714 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16716 fix->insn = insn;
16717 fix->address = address;
16718 fix->loc = loc;
16719 fix->mode = mode;
16720 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16721 fix->value = value;
16722 fix->forwards = get_attr_pool_range (insn);
16723 fix->backwards = get_attr_neg_pool_range (insn);
16724 fix->minipool = NULL;
16726 /* If an insn doesn't have a range defined for it, then it isn't
16727 expecting to be reworked by this code. Better to stop now than
16728 to generate duff assembly code. */
16729 gcc_assert (fix->forwards || fix->backwards);
16731 /* If an entry requires 8-byte alignment then assume all constant pools
16732 require 4 bytes of padding. Trying to do this later on a per-pool
16733 basis is awkward because existing pool entries have to be modified. */
16734 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16735 minipool_pad = 4;
16737 if (dump_file)
16739 fprintf (dump_file,
16740 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16741 GET_MODE_NAME (mode),
16742 INSN_UID (insn), (unsigned long) address,
16743 -1 * (long)fix->backwards, (long)fix->forwards);
16744 arm_print_value (dump_file, fix->value);
16745 fprintf (dump_file, "\n");
16748 /* Add it to the chain of fixes. */
16749 fix->next = NULL;
16751 if (minipool_fix_head != NULL)
16752 minipool_fix_tail->next = fix;
16753 else
16754 minipool_fix_head = fix;
16756 minipool_fix_tail = fix;
16759 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16760 Returns the number of insns needed, or 99 if we always want to synthesize
16761 the value. */
16763 arm_max_const_double_inline_cost ()
16765 /* Let the value get synthesized to avoid the use of literal pools. */
16766 if (arm_disable_literal_pool)
16767 return 99;
16769 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16772 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16773 Returns the number of insns needed, or 99 if we don't know how to
16774 do it. */
16776 arm_const_double_inline_cost (rtx val)
16778 rtx lowpart, highpart;
16779 enum machine_mode mode;
16781 mode = GET_MODE (val);
16783 if (mode == VOIDmode)
16784 mode = DImode;
16786 gcc_assert (GET_MODE_SIZE (mode) == 8);
16788 lowpart = gen_lowpart (SImode, val);
16789 highpart = gen_highpart_mode (SImode, mode, val);
16791 gcc_assert (CONST_INT_P (lowpart));
16792 gcc_assert (CONST_INT_P (highpart));
16794 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16795 NULL_RTX, NULL_RTX, 0, 0)
16796 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16797 NULL_RTX, NULL_RTX, 0, 0));
16800 /* Return true if it is worthwhile to split a 64-bit constant into two
16801 32-bit operations. This is the case if optimizing for size, or
16802 if we have load delay slots, or if one 32-bit part can be done with
16803 a single data operation. */
16804 bool
16805 arm_const_double_by_parts (rtx val)
16807 enum machine_mode mode = GET_MODE (val);
16808 rtx part;
16810 if (optimize_size || arm_ld_sched)
16811 return true;
16813 if (mode == VOIDmode)
16814 mode = DImode;
16816 part = gen_highpart_mode (SImode, mode, val);
16818 gcc_assert (CONST_INT_P (part));
16820 if (const_ok_for_arm (INTVAL (part))
16821 || const_ok_for_arm (~INTVAL (part)))
16822 return true;
16824 part = gen_lowpart (SImode, val);
16826 gcc_assert (CONST_INT_P (part));
16828 if (const_ok_for_arm (INTVAL (part))
16829 || const_ok_for_arm (~INTVAL (part)))
16830 return true;
16832 return false;
16835 /* Return true if it is possible to inline both the high and low parts
16836 of a 64-bit constant into 32-bit data processing instructions. */
16837 bool
16838 arm_const_double_by_immediates (rtx val)
16840 enum machine_mode mode = GET_MODE (val);
16841 rtx part;
16843 if (mode == VOIDmode)
16844 mode = DImode;
16846 part = gen_highpart_mode (SImode, mode, val);
16848 gcc_assert (CONST_INT_P (part));
16850 if (!const_ok_for_arm (INTVAL (part)))
16851 return false;
16853 part = gen_lowpart (SImode, val);
16855 gcc_assert (CONST_INT_P (part));
16857 if (!const_ok_for_arm (INTVAL (part)))
16858 return false;
16860 return true;
16863 /* Scan INSN and note any of its operands that need fixing.
16864 If DO_PUSHES is false we do not actually push any of the fixups
16865 needed. */
16866 static void
16867 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16869 int opno;
16871 extract_insn (insn);
16873 if (!constrain_operands (1))
16874 fatal_insn_not_found (insn);
16876 if (recog_data.n_alternatives == 0)
16877 return;
16879 /* Fill in recog_op_alt with information about the constraints of
16880 this insn. */
16881 preprocess_constraints (insn);
16883 const operand_alternative *op_alt = which_op_alt ();
16884 for (opno = 0; opno < recog_data.n_operands; opno++)
16886 /* Things we need to fix can only occur in inputs. */
16887 if (recog_data.operand_type[opno] != OP_IN)
16888 continue;
16890 /* If this alternative is a memory reference, then any mention
16891 of constants in this alternative is really to fool reload
16892 into allowing us to accept one there. We need to fix them up
16893 now so that we output the right code. */
16894 if (op_alt[opno].memory_ok)
16896 rtx op = recog_data.operand[opno];
16898 if (CONSTANT_P (op))
16900 if (do_pushes)
16901 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16902 recog_data.operand_mode[opno], op);
16904 else if (MEM_P (op)
16905 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16906 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16908 if (do_pushes)
16910 rtx cop = avoid_constant_pool_reference (op);
16912 /* Casting the address of something to a mode narrower
16913 than a word can cause avoid_constant_pool_reference()
16914 to return the pool reference itself. That's no good to
16915 us here. Lets just hope that we can use the
16916 constant pool value directly. */
16917 if (op == cop)
16918 cop = get_pool_constant (XEXP (op, 0));
16920 push_minipool_fix (insn, address,
16921 recog_data.operand_loc[opno],
16922 recog_data.operand_mode[opno], cop);
16929 return;
16932 /* Rewrite move insn into subtract of 0 if the condition codes will
16933 be useful in next conditional jump insn. */
16935 static void
16936 thumb1_reorg (void)
16938 basic_block bb;
16940 FOR_EACH_BB_FN (bb, cfun)
16942 rtx dest, src;
16943 rtx pat, op0, set = NULL;
16944 rtx prev, insn = BB_END (bb);
16945 bool insn_clobbered = false;
16947 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16948 insn = PREV_INSN (insn);
16950 /* Find the last cbranchsi4_insn in basic block BB. */
16951 if (insn == BB_HEAD (bb)
16952 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16953 continue;
16955 /* Get the register with which we are comparing. */
16956 pat = PATTERN (insn);
16957 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16959 /* Find the first flag setting insn before INSN in basic block BB. */
16960 gcc_assert (insn != BB_HEAD (bb));
16961 for (prev = PREV_INSN (insn);
16962 (!insn_clobbered
16963 && prev != BB_HEAD (bb)
16964 && (NOTE_P (prev)
16965 || DEBUG_INSN_P (prev)
16966 || ((set = single_set (prev)) != NULL
16967 && get_attr_conds (prev) == CONDS_NOCOND)));
16968 prev = PREV_INSN (prev))
16970 if (reg_set_p (op0, prev))
16971 insn_clobbered = true;
16974 /* Skip if op0 is clobbered by insn other than prev. */
16975 if (insn_clobbered)
16976 continue;
16978 if (!set)
16979 continue;
16981 dest = SET_DEST (set);
16982 src = SET_SRC (set);
16983 if (!low_register_operand (dest, SImode)
16984 || !low_register_operand (src, SImode))
16985 continue;
16987 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16988 in INSN. Both src and dest of the move insn are checked. */
16989 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16991 dest = copy_rtx (dest);
16992 src = copy_rtx (src);
16993 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16994 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16995 INSN_CODE (prev) = -1;
16996 /* Set test register in INSN to dest. */
16997 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16998 INSN_CODE (insn) = -1;
17003 /* Convert instructions to their cc-clobbering variant if possible, since
17004 that allows us to use smaller encodings. */
17006 static void
17007 thumb2_reorg (void)
17009 basic_block bb;
17010 regset_head live;
17012 INIT_REG_SET (&live);
17014 /* We are freeing block_for_insn in the toplev to keep compatibility
17015 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17016 compute_bb_for_insn ();
17017 df_analyze ();
17019 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17021 FOR_EACH_BB_FN (bb, cfun)
17023 if (current_tune->disparage_flag_setting_t16_encodings
17024 && optimize_bb_for_speed_p (bb))
17025 continue;
17027 rtx insn;
17028 Convert_Action action = SKIP;
17029 Convert_Action action_for_partial_flag_setting
17030 = (current_tune->disparage_partial_flag_setting_t16_encodings
17031 && optimize_bb_for_speed_p (bb))
17032 ? SKIP : CONV;
17034 COPY_REG_SET (&live, DF_LR_OUT (bb));
17035 df_simulate_initialize_backwards (bb, &live);
17036 FOR_BB_INSNS_REVERSE (bb, insn)
17038 if (NONJUMP_INSN_P (insn)
17039 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17040 && GET_CODE (PATTERN (insn)) == SET)
17042 action = SKIP;
17043 rtx pat = PATTERN (insn);
17044 rtx dst = XEXP (pat, 0);
17045 rtx src = XEXP (pat, 1);
17046 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17048 if (!OBJECT_P (src))
17049 op0 = XEXP (src, 0);
17051 if (BINARY_P (src))
17052 op1 = XEXP (src, 1);
17054 if (low_register_operand (dst, SImode))
17056 switch (GET_CODE (src))
17058 case PLUS:
17059 /* Adding two registers and storing the result
17060 in the first source is already a 16-bit
17061 operation. */
17062 if (rtx_equal_p (dst, op0)
17063 && register_operand (op1, SImode))
17064 break;
17066 if (low_register_operand (op0, SImode))
17068 /* ADDS <Rd>,<Rn>,<Rm> */
17069 if (low_register_operand (op1, SImode))
17070 action = CONV;
17071 /* ADDS <Rdn>,#<imm8> */
17072 /* SUBS <Rdn>,#<imm8> */
17073 else if (rtx_equal_p (dst, op0)
17074 && CONST_INT_P (op1)
17075 && IN_RANGE (INTVAL (op1), -255, 255))
17076 action = CONV;
17077 /* ADDS <Rd>,<Rn>,#<imm3> */
17078 /* SUBS <Rd>,<Rn>,#<imm3> */
17079 else if (CONST_INT_P (op1)
17080 && IN_RANGE (INTVAL (op1), -7, 7))
17081 action = CONV;
17083 /* ADCS <Rd>, <Rn> */
17084 else if (GET_CODE (XEXP (src, 0)) == PLUS
17085 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17086 && low_register_operand (XEXP (XEXP (src, 0), 1),
17087 SImode)
17088 && COMPARISON_P (op1)
17089 && cc_register (XEXP (op1, 0), VOIDmode)
17090 && maybe_get_arm_condition_code (op1) == ARM_CS
17091 && XEXP (op1, 1) == const0_rtx)
17092 action = CONV;
17093 break;
17095 case MINUS:
17096 /* RSBS <Rd>,<Rn>,#0
17097 Not handled here: see NEG below. */
17098 /* SUBS <Rd>,<Rn>,#<imm3>
17099 SUBS <Rdn>,#<imm8>
17100 Not handled here: see PLUS above. */
17101 /* SUBS <Rd>,<Rn>,<Rm> */
17102 if (low_register_operand (op0, SImode)
17103 && low_register_operand (op1, SImode))
17104 action = CONV;
17105 break;
17107 case MULT:
17108 /* MULS <Rdm>,<Rn>,<Rdm>
17109 As an exception to the rule, this is only used
17110 when optimizing for size since MULS is slow on all
17111 known implementations. We do not even want to use
17112 MULS in cold code, if optimizing for speed, so we
17113 test the global flag here. */
17114 if (!optimize_size)
17115 break;
17116 /* else fall through. */
17117 case AND:
17118 case IOR:
17119 case XOR:
17120 /* ANDS <Rdn>,<Rm> */
17121 if (rtx_equal_p (dst, op0)
17122 && low_register_operand (op1, SImode))
17123 action = action_for_partial_flag_setting;
17124 else if (rtx_equal_p (dst, op1)
17125 && low_register_operand (op0, SImode))
17126 action = action_for_partial_flag_setting == SKIP
17127 ? SKIP : SWAP_CONV;
17128 break;
17130 case ASHIFTRT:
17131 case ASHIFT:
17132 case LSHIFTRT:
17133 /* ASRS <Rdn>,<Rm> */
17134 /* LSRS <Rdn>,<Rm> */
17135 /* LSLS <Rdn>,<Rm> */
17136 if (rtx_equal_p (dst, op0)
17137 && low_register_operand (op1, SImode))
17138 action = action_for_partial_flag_setting;
17139 /* ASRS <Rd>,<Rm>,#<imm5> */
17140 /* LSRS <Rd>,<Rm>,#<imm5> */
17141 /* LSLS <Rd>,<Rm>,#<imm5> */
17142 else if (low_register_operand (op0, SImode)
17143 && CONST_INT_P (op1)
17144 && IN_RANGE (INTVAL (op1), 0, 31))
17145 action = action_for_partial_flag_setting;
17146 break;
17148 case ROTATERT:
17149 /* RORS <Rdn>,<Rm> */
17150 if (rtx_equal_p (dst, op0)
17151 && low_register_operand (op1, SImode))
17152 action = action_for_partial_flag_setting;
17153 break;
17155 case NOT:
17156 /* MVNS <Rd>,<Rm> */
17157 if (low_register_operand (op0, SImode))
17158 action = action_for_partial_flag_setting;
17159 break;
17161 case NEG:
17162 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17163 if (low_register_operand (op0, SImode))
17164 action = CONV;
17165 break;
17167 case CONST_INT:
17168 /* MOVS <Rd>,#<imm8> */
17169 if (CONST_INT_P (src)
17170 && IN_RANGE (INTVAL (src), 0, 255))
17171 action = action_for_partial_flag_setting;
17172 break;
17174 case REG:
17175 /* MOVS and MOV<c> with registers have different
17176 encodings, so are not relevant here. */
17177 break;
17179 default:
17180 break;
17184 if (action != SKIP)
17186 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17187 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17188 rtvec vec;
17190 if (action == SWAP_CONV)
17192 src = copy_rtx (src);
17193 XEXP (src, 0) = op1;
17194 XEXP (src, 1) = op0;
17195 pat = gen_rtx_SET (VOIDmode, dst, src);
17196 vec = gen_rtvec (2, pat, clobber);
17198 else /* action == CONV */
17199 vec = gen_rtvec (2, pat, clobber);
17201 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17202 INSN_CODE (insn) = -1;
17206 if (NONDEBUG_INSN_P (insn))
17207 df_simulate_one_insn_backwards (bb, insn, &live);
17211 CLEAR_REG_SET (&live);
17214 /* Gcc puts the pool in the wrong place for ARM, since we can only
17215 load addresses a limited distance around the pc. We do some
17216 special munging to move the constant pool values to the correct
17217 point in the code. */
17218 static void
17219 arm_reorg (void)
17221 rtx insn;
17222 HOST_WIDE_INT address = 0;
17223 Mfix * fix;
17225 if (TARGET_THUMB1)
17226 thumb1_reorg ();
17227 else if (TARGET_THUMB2)
17228 thumb2_reorg ();
17230 /* Ensure all insns that must be split have been split at this point.
17231 Otherwise, the pool placement code below may compute incorrect
17232 insn lengths. Note that when optimizing, all insns have already
17233 been split at this point. */
17234 if (!optimize)
17235 split_all_insns_noflow ();
17237 minipool_fix_head = minipool_fix_tail = NULL;
17239 /* The first insn must always be a note, or the code below won't
17240 scan it properly. */
17241 insn = get_insns ();
17242 gcc_assert (NOTE_P (insn));
17243 minipool_pad = 0;
17245 /* Scan all the insns and record the operands that will need fixing. */
17246 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17248 if (BARRIER_P (insn))
17249 push_minipool_barrier (insn, address);
17250 else if (INSN_P (insn))
17252 rtx table;
17254 note_invalid_constants (insn, address, true);
17255 address += get_attr_length (insn);
17257 /* If the insn is a vector jump, add the size of the table
17258 and skip the table. */
17259 if (tablejump_p (insn, NULL, &table))
17261 address += get_jump_table_size (table);
17262 insn = table;
17265 else if (LABEL_P (insn))
17266 /* Add the worst-case padding due to alignment. We don't add
17267 the _current_ padding because the minipool insertions
17268 themselves might change it. */
17269 address += get_label_padding (insn);
17272 fix = minipool_fix_head;
17274 /* Now scan the fixups and perform the required changes. */
17275 while (fix)
17277 Mfix * ftmp;
17278 Mfix * fdel;
17279 Mfix * last_added_fix;
17280 Mfix * last_barrier = NULL;
17281 Mfix * this_fix;
17283 /* Skip any further barriers before the next fix. */
17284 while (fix && BARRIER_P (fix->insn))
17285 fix = fix->next;
17287 /* No more fixes. */
17288 if (fix == NULL)
17289 break;
17291 last_added_fix = NULL;
17293 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17295 if (BARRIER_P (ftmp->insn))
17297 if (ftmp->address >= minipool_vector_head->max_address)
17298 break;
17300 last_barrier = ftmp;
17302 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17303 break;
17305 last_added_fix = ftmp; /* Keep track of the last fix added. */
17308 /* If we found a barrier, drop back to that; any fixes that we
17309 could have reached but come after the barrier will now go in
17310 the next mini-pool. */
17311 if (last_barrier != NULL)
17313 /* Reduce the refcount for those fixes that won't go into this
17314 pool after all. */
17315 for (fdel = last_barrier->next;
17316 fdel && fdel != ftmp;
17317 fdel = fdel->next)
17319 fdel->minipool->refcount--;
17320 fdel->minipool = NULL;
17323 ftmp = last_barrier;
17325 else
17327 /* ftmp is first fix that we can't fit into this pool and
17328 there no natural barriers that we could use. Insert a
17329 new barrier in the code somewhere between the previous
17330 fix and this one, and arrange to jump around it. */
17331 HOST_WIDE_INT max_address;
17333 /* The last item on the list of fixes must be a barrier, so
17334 we can never run off the end of the list of fixes without
17335 last_barrier being set. */
17336 gcc_assert (ftmp);
17338 max_address = minipool_vector_head->max_address;
17339 /* Check that there isn't another fix that is in range that
17340 we couldn't fit into this pool because the pool was
17341 already too large: we need to put the pool before such an
17342 instruction. The pool itself may come just after the
17343 fix because create_fix_barrier also allows space for a
17344 jump instruction. */
17345 if (ftmp->address < max_address)
17346 max_address = ftmp->address + 1;
17348 last_barrier = create_fix_barrier (last_added_fix, max_address);
17351 assign_minipool_offsets (last_barrier);
17353 while (ftmp)
17355 if (!BARRIER_P (ftmp->insn)
17356 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17357 == NULL))
17358 break;
17360 ftmp = ftmp->next;
17363 /* Scan over the fixes we have identified for this pool, fixing them
17364 up and adding the constants to the pool itself. */
17365 for (this_fix = fix; this_fix && ftmp != this_fix;
17366 this_fix = this_fix->next)
17367 if (!BARRIER_P (this_fix->insn))
17369 rtx addr
17370 = plus_constant (Pmode,
17371 gen_rtx_LABEL_REF (VOIDmode,
17372 minipool_vector_label),
17373 this_fix->minipool->offset);
17374 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17377 dump_minipool (last_barrier->insn);
17378 fix = ftmp;
17381 /* From now on we must synthesize any constants that we can't handle
17382 directly. This can happen if the RTL gets split during final
17383 instruction generation. */
17384 cfun->machine->after_arm_reorg = 1;
17386 /* Free the minipool memory. */
17387 obstack_free (&minipool_obstack, minipool_startobj);
17390 /* Routines to output assembly language. */
17392 /* If the rtx is the correct value then return the string of the number.
17393 In this way we can ensure that valid double constants are generated even
17394 when cross compiling. */
17395 const char *
17396 fp_immediate_constant (rtx x)
17398 REAL_VALUE_TYPE r;
17400 if (!fp_consts_inited)
17401 init_fp_table ();
17403 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17405 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17406 return "0";
17409 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17410 static const char *
17411 fp_const_from_val (REAL_VALUE_TYPE *r)
17413 if (!fp_consts_inited)
17414 init_fp_table ();
17416 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17417 return "0";
17420 /* OPERANDS[0] is the entire list of insns that constitute pop,
17421 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17422 is in the list, UPDATE is true iff the list contains explicit
17423 update of base register. */
17424 void
17425 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17426 bool update)
17428 int i;
17429 char pattern[100];
17430 int offset;
17431 const char *conditional;
17432 int num_saves = XVECLEN (operands[0], 0);
17433 unsigned int regno;
17434 unsigned int regno_base = REGNO (operands[1]);
17436 offset = 0;
17437 offset += update ? 1 : 0;
17438 offset += return_pc ? 1 : 0;
17440 /* Is the base register in the list? */
17441 for (i = offset; i < num_saves; i++)
17443 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17444 /* If SP is in the list, then the base register must be SP. */
17445 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17446 /* If base register is in the list, there must be no explicit update. */
17447 if (regno == regno_base)
17448 gcc_assert (!update);
17451 conditional = reverse ? "%?%D0" : "%?%d0";
17452 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17454 /* Output pop (not stmfd) because it has a shorter encoding. */
17455 gcc_assert (update);
17456 sprintf (pattern, "pop%s\t{", conditional);
17458 else
17460 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17461 It's just a convention, their semantics are identical. */
17462 if (regno_base == SP_REGNUM)
17463 sprintf (pattern, "ldm%sfd\t", conditional);
17464 else if (TARGET_UNIFIED_ASM)
17465 sprintf (pattern, "ldmia%s\t", conditional);
17466 else
17467 sprintf (pattern, "ldm%sia\t", conditional);
17469 strcat (pattern, reg_names[regno_base]);
17470 if (update)
17471 strcat (pattern, "!, {");
17472 else
17473 strcat (pattern, ", {");
17476 /* Output the first destination register. */
17477 strcat (pattern,
17478 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17480 /* Output the rest of the destination registers. */
17481 for (i = offset + 1; i < num_saves; i++)
17483 strcat (pattern, ", ");
17484 strcat (pattern,
17485 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17488 strcat (pattern, "}");
17490 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17491 strcat (pattern, "^");
17493 output_asm_insn (pattern, &cond);
17497 /* Output the assembly for a store multiple. */
17499 const char *
17500 vfp_output_fstmd (rtx * operands)
17502 char pattern[100];
17503 int p;
17504 int base;
17505 int i;
17507 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17508 p = strlen (pattern);
17510 gcc_assert (REG_P (operands[1]));
17512 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17513 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17515 p += sprintf (&pattern[p], ", d%d", base + i);
17517 strcpy (&pattern[p], "}");
17519 output_asm_insn (pattern, operands);
17520 return "";
17524 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17525 number of bytes pushed. */
17527 static int
17528 vfp_emit_fstmd (int base_reg, int count)
17530 rtx par;
17531 rtx dwarf;
17532 rtx tmp, reg;
17533 int i;
17535 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17536 register pairs are stored by a store multiple insn. We avoid this
17537 by pushing an extra pair. */
17538 if (count == 2 && !arm_arch6)
17540 if (base_reg == LAST_VFP_REGNUM - 3)
17541 base_reg -= 2;
17542 count++;
17545 /* FSTMD may not store more than 16 doubleword registers at once. Split
17546 larger stores into multiple parts (up to a maximum of two, in
17547 practice). */
17548 if (count > 16)
17550 int saved;
17551 /* NOTE: base_reg is an internal register number, so each D register
17552 counts as 2. */
17553 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17554 saved += vfp_emit_fstmd (base_reg, 16);
17555 return saved;
17558 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17559 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17561 reg = gen_rtx_REG (DFmode, base_reg);
17562 base_reg += 2;
17564 XVECEXP (par, 0, 0)
17565 = gen_rtx_SET (VOIDmode,
17566 gen_frame_mem
17567 (BLKmode,
17568 gen_rtx_PRE_MODIFY (Pmode,
17569 stack_pointer_rtx,
17570 plus_constant
17571 (Pmode, stack_pointer_rtx,
17572 - (count * 8)))
17574 gen_rtx_UNSPEC (BLKmode,
17575 gen_rtvec (1, reg),
17576 UNSPEC_PUSH_MULT));
17578 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17579 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17580 RTX_FRAME_RELATED_P (tmp) = 1;
17581 XVECEXP (dwarf, 0, 0) = tmp;
17583 tmp = gen_rtx_SET (VOIDmode,
17584 gen_frame_mem (DFmode, stack_pointer_rtx),
17585 reg);
17586 RTX_FRAME_RELATED_P (tmp) = 1;
17587 XVECEXP (dwarf, 0, 1) = tmp;
17589 for (i = 1; i < count; i++)
17591 reg = gen_rtx_REG (DFmode, base_reg);
17592 base_reg += 2;
17593 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17595 tmp = gen_rtx_SET (VOIDmode,
17596 gen_frame_mem (DFmode,
17597 plus_constant (Pmode,
17598 stack_pointer_rtx,
17599 i * 8)),
17600 reg);
17601 RTX_FRAME_RELATED_P (tmp) = 1;
17602 XVECEXP (dwarf, 0, i + 1) = tmp;
17605 par = emit_insn (par);
17606 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17607 RTX_FRAME_RELATED_P (par) = 1;
17609 return count * 8;
17612 /* Emit a call instruction with pattern PAT. ADDR is the address of
17613 the call target. */
17615 void
17616 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17618 rtx insn;
17620 insn = emit_call_insn (pat);
17622 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17623 If the call might use such an entry, add a use of the PIC register
17624 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17625 if (TARGET_VXWORKS_RTP
17626 && flag_pic
17627 && !sibcall
17628 && GET_CODE (addr) == SYMBOL_REF
17629 && (SYMBOL_REF_DECL (addr)
17630 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17631 : !SYMBOL_REF_LOCAL_P (addr)))
17633 require_pic_register ();
17634 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17637 if (TARGET_AAPCS_BASED)
17639 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17640 linker. We need to add an IP clobber to allow setting
17641 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17642 is not needed since it's a fixed register. */
17643 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17644 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17648 /* Output a 'call' insn. */
17649 const char *
17650 output_call (rtx *operands)
17652 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17654 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17655 if (REGNO (operands[0]) == LR_REGNUM)
17657 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17658 output_asm_insn ("mov%?\t%0, %|lr", operands);
17661 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17663 if (TARGET_INTERWORK || arm_arch4t)
17664 output_asm_insn ("bx%?\t%0", operands);
17665 else
17666 output_asm_insn ("mov%?\t%|pc, %0", operands);
17668 return "";
17671 /* Output a 'call' insn that is a reference in memory. This is
17672 disabled for ARMv5 and we prefer a blx instead because otherwise
17673 there's a significant performance overhead. */
17674 const char *
17675 output_call_mem (rtx *operands)
17677 gcc_assert (!arm_arch5);
17678 if (TARGET_INTERWORK)
17680 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17681 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17682 output_asm_insn ("bx%?\t%|ip", operands);
17684 else if (regno_use_in (LR_REGNUM, operands[0]))
17686 /* LR is used in the memory address. We load the address in the
17687 first instruction. It's safe to use IP as the target of the
17688 load since the call will kill it anyway. */
17689 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17690 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17691 if (arm_arch4t)
17692 output_asm_insn ("bx%?\t%|ip", operands);
17693 else
17694 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17696 else
17698 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17699 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17702 return "";
17706 /* Output a move from arm registers to arm registers of a long double
17707 OPERANDS[0] is the destination.
17708 OPERANDS[1] is the source. */
17709 const char *
17710 output_mov_long_double_arm_from_arm (rtx *operands)
17712 /* We have to be careful here because the two might overlap. */
17713 int dest_start = REGNO (operands[0]);
17714 int src_start = REGNO (operands[1]);
17715 rtx ops[2];
17716 int i;
17718 if (dest_start < src_start)
17720 for (i = 0; i < 3; i++)
17722 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17723 ops[1] = gen_rtx_REG (SImode, src_start + i);
17724 output_asm_insn ("mov%?\t%0, %1", ops);
17727 else
17729 for (i = 2; i >= 0; i--)
17731 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17732 ops[1] = gen_rtx_REG (SImode, src_start + i);
17733 output_asm_insn ("mov%?\t%0, %1", ops);
17737 return "";
17740 void
17741 arm_emit_movpair (rtx dest, rtx src)
17743 /* If the src is an immediate, simplify it. */
17744 if (CONST_INT_P (src))
17746 HOST_WIDE_INT val = INTVAL (src);
17747 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17748 if ((val >> 16) & 0x0000ffff)
17749 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17750 GEN_INT (16)),
17751 GEN_INT ((val >> 16) & 0x0000ffff));
17752 return;
17754 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17755 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17758 /* Output a move between double words. It must be REG<-MEM
17759 or MEM<-REG. */
17760 const char *
17761 output_move_double (rtx *operands, bool emit, int *count)
17763 enum rtx_code code0 = GET_CODE (operands[0]);
17764 enum rtx_code code1 = GET_CODE (operands[1]);
17765 rtx otherops[3];
17766 if (count)
17767 *count = 1;
17769 /* The only case when this might happen is when
17770 you are looking at the length of a DImode instruction
17771 that has an invalid constant in it. */
17772 if (code0 == REG && code1 != MEM)
17774 gcc_assert (!emit);
17775 *count = 2;
17776 return "";
17779 if (code0 == REG)
17781 unsigned int reg0 = REGNO (operands[0]);
17783 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17785 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17787 switch (GET_CODE (XEXP (operands[1], 0)))
17789 case REG:
17791 if (emit)
17793 if (TARGET_LDRD
17794 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17795 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17796 else
17797 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17799 break;
17801 case PRE_INC:
17802 gcc_assert (TARGET_LDRD);
17803 if (emit)
17804 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17805 break;
17807 case PRE_DEC:
17808 if (emit)
17810 if (TARGET_LDRD)
17811 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17812 else
17813 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17815 break;
17817 case POST_INC:
17818 if (emit)
17820 if (TARGET_LDRD)
17821 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17822 else
17823 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17825 break;
17827 case POST_DEC:
17828 gcc_assert (TARGET_LDRD);
17829 if (emit)
17830 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17831 break;
17833 case PRE_MODIFY:
17834 case POST_MODIFY:
17835 /* Autoicrement addressing modes should never have overlapping
17836 base and destination registers, and overlapping index registers
17837 are already prohibited, so this doesn't need to worry about
17838 fix_cm3_ldrd. */
17839 otherops[0] = operands[0];
17840 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17841 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17843 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17845 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17847 /* Registers overlap so split out the increment. */
17848 if (emit)
17850 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17851 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17853 if (count)
17854 *count = 2;
17856 else
17858 /* Use a single insn if we can.
17859 FIXME: IWMMXT allows offsets larger than ldrd can
17860 handle, fix these up with a pair of ldr. */
17861 if (TARGET_THUMB2
17862 || !CONST_INT_P (otherops[2])
17863 || (INTVAL (otherops[2]) > -256
17864 && INTVAL (otherops[2]) < 256))
17866 if (emit)
17867 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17869 else
17871 if (emit)
17873 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17874 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17876 if (count)
17877 *count = 2;
17882 else
17884 /* Use a single insn if we can.
17885 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17886 fix these up with a pair of ldr. */
17887 if (TARGET_THUMB2
17888 || !CONST_INT_P (otherops[2])
17889 || (INTVAL (otherops[2]) > -256
17890 && INTVAL (otherops[2]) < 256))
17892 if (emit)
17893 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17895 else
17897 if (emit)
17899 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17900 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17902 if (count)
17903 *count = 2;
17906 break;
17908 case LABEL_REF:
17909 case CONST:
17910 /* We might be able to use ldrd %0, %1 here. However the range is
17911 different to ldr/adr, and it is broken on some ARMv7-M
17912 implementations. */
17913 /* Use the second register of the pair to avoid problematic
17914 overlap. */
17915 otherops[1] = operands[1];
17916 if (emit)
17917 output_asm_insn ("adr%?\t%0, %1", otherops);
17918 operands[1] = otherops[0];
17919 if (emit)
17921 if (TARGET_LDRD)
17922 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17923 else
17924 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17927 if (count)
17928 *count = 2;
17929 break;
17931 /* ??? This needs checking for thumb2. */
17932 default:
17933 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17934 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17936 otherops[0] = operands[0];
17937 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17938 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17940 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17942 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17944 switch ((int) INTVAL (otherops[2]))
17946 case -8:
17947 if (emit)
17948 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17949 return "";
17950 case -4:
17951 if (TARGET_THUMB2)
17952 break;
17953 if (emit)
17954 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17955 return "";
17956 case 4:
17957 if (TARGET_THUMB2)
17958 break;
17959 if (emit)
17960 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17961 return "";
17964 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17965 operands[1] = otherops[0];
17966 if (TARGET_LDRD
17967 && (REG_P (otherops[2])
17968 || TARGET_THUMB2
17969 || (CONST_INT_P (otherops[2])
17970 && INTVAL (otherops[2]) > -256
17971 && INTVAL (otherops[2]) < 256)))
17973 if (reg_overlap_mentioned_p (operands[0],
17974 otherops[2]))
17976 rtx tmp;
17977 /* Swap base and index registers over to
17978 avoid a conflict. */
17979 tmp = otherops[1];
17980 otherops[1] = otherops[2];
17981 otherops[2] = tmp;
17983 /* If both registers conflict, it will usually
17984 have been fixed by a splitter. */
17985 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17986 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17988 if (emit)
17990 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17991 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17993 if (count)
17994 *count = 2;
17996 else
17998 otherops[0] = operands[0];
17999 if (emit)
18000 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18002 return "";
18005 if (CONST_INT_P (otherops[2]))
18007 if (emit)
18009 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18010 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18011 else
18012 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18015 else
18017 if (emit)
18018 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18021 else
18023 if (emit)
18024 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18027 if (count)
18028 *count = 2;
18030 if (TARGET_LDRD)
18031 return "ldr%(d%)\t%0, [%1]";
18033 return "ldm%(ia%)\t%1, %M0";
18035 else
18037 otherops[1] = adjust_address (operands[1], SImode, 4);
18038 /* Take care of overlapping base/data reg. */
18039 if (reg_mentioned_p (operands[0], operands[1]))
18041 if (emit)
18043 output_asm_insn ("ldr%?\t%0, %1", otherops);
18044 output_asm_insn ("ldr%?\t%0, %1", operands);
18046 if (count)
18047 *count = 2;
18050 else
18052 if (emit)
18054 output_asm_insn ("ldr%?\t%0, %1", operands);
18055 output_asm_insn ("ldr%?\t%0, %1", otherops);
18057 if (count)
18058 *count = 2;
18063 else
18065 /* Constraints should ensure this. */
18066 gcc_assert (code0 == MEM && code1 == REG);
18067 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18068 || (TARGET_ARM && TARGET_LDRD));
18070 switch (GET_CODE (XEXP (operands[0], 0)))
18072 case REG:
18073 if (emit)
18075 if (TARGET_LDRD)
18076 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18077 else
18078 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18080 break;
18082 case PRE_INC:
18083 gcc_assert (TARGET_LDRD);
18084 if (emit)
18085 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18086 break;
18088 case PRE_DEC:
18089 if (emit)
18091 if (TARGET_LDRD)
18092 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18093 else
18094 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18096 break;
18098 case POST_INC:
18099 if (emit)
18101 if (TARGET_LDRD)
18102 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18103 else
18104 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18106 break;
18108 case POST_DEC:
18109 gcc_assert (TARGET_LDRD);
18110 if (emit)
18111 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18112 break;
18114 case PRE_MODIFY:
18115 case POST_MODIFY:
18116 otherops[0] = operands[1];
18117 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18118 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18120 /* IWMMXT allows offsets larger than ldrd can handle,
18121 fix these up with a pair of ldr. */
18122 if (!TARGET_THUMB2
18123 && CONST_INT_P (otherops[2])
18124 && (INTVAL(otherops[2]) <= -256
18125 || INTVAL(otherops[2]) >= 256))
18127 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18129 if (emit)
18131 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18132 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18134 if (count)
18135 *count = 2;
18137 else
18139 if (emit)
18141 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18142 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18144 if (count)
18145 *count = 2;
18148 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18150 if (emit)
18151 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18153 else
18155 if (emit)
18156 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18158 break;
18160 case PLUS:
18161 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18162 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18164 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18166 case -8:
18167 if (emit)
18168 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18169 return "";
18171 case -4:
18172 if (TARGET_THUMB2)
18173 break;
18174 if (emit)
18175 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18176 return "";
18178 case 4:
18179 if (TARGET_THUMB2)
18180 break;
18181 if (emit)
18182 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18183 return "";
18186 if (TARGET_LDRD
18187 && (REG_P (otherops[2])
18188 || TARGET_THUMB2
18189 || (CONST_INT_P (otherops[2])
18190 && INTVAL (otherops[2]) > -256
18191 && INTVAL (otherops[2]) < 256)))
18193 otherops[0] = operands[1];
18194 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18195 if (emit)
18196 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18197 return "";
18199 /* Fall through */
18201 default:
18202 otherops[0] = adjust_address (operands[0], SImode, 4);
18203 otherops[1] = operands[1];
18204 if (emit)
18206 output_asm_insn ("str%?\t%1, %0", operands);
18207 output_asm_insn ("str%?\t%H1, %0", otherops);
18209 if (count)
18210 *count = 2;
18214 return "";
18217 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18218 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18220 const char *
18221 output_move_quad (rtx *operands)
18223 if (REG_P (operands[0]))
18225 /* Load, or reg->reg move. */
18227 if (MEM_P (operands[1]))
18229 switch (GET_CODE (XEXP (operands[1], 0)))
18231 case REG:
18232 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18233 break;
18235 case LABEL_REF:
18236 case CONST:
18237 output_asm_insn ("adr%?\t%0, %1", operands);
18238 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18239 break;
18241 default:
18242 gcc_unreachable ();
18245 else
18247 rtx ops[2];
18248 int dest, src, i;
18250 gcc_assert (REG_P (operands[1]));
18252 dest = REGNO (operands[0]);
18253 src = REGNO (operands[1]);
18255 /* This seems pretty dumb, but hopefully GCC won't try to do it
18256 very often. */
18257 if (dest < src)
18258 for (i = 0; i < 4; i++)
18260 ops[0] = gen_rtx_REG (SImode, dest + i);
18261 ops[1] = gen_rtx_REG (SImode, src + i);
18262 output_asm_insn ("mov%?\t%0, %1", ops);
18264 else
18265 for (i = 3; i >= 0; i--)
18267 ops[0] = gen_rtx_REG (SImode, dest + i);
18268 ops[1] = gen_rtx_REG (SImode, src + i);
18269 output_asm_insn ("mov%?\t%0, %1", ops);
18273 else
18275 gcc_assert (MEM_P (operands[0]));
18276 gcc_assert (REG_P (operands[1]));
18277 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18279 switch (GET_CODE (XEXP (operands[0], 0)))
18281 case REG:
18282 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18283 break;
18285 default:
18286 gcc_unreachable ();
18290 return "";
18293 /* Output a VFP load or store instruction. */
18295 const char *
18296 output_move_vfp (rtx *operands)
18298 rtx reg, mem, addr, ops[2];
18299 int load = REG_P (operands[0]);
18300 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18301 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18302 const char *templ;
18303 char buff[50];
18304 enum machine_mode mode;
18306 reg = operands[!load];
18307 mem = operands[load];
18309 mode = GET_MODE (reg);
18311 gcc_assert (REG_P (reg));
18312 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18313 gcc_assert (mode == SFmode
18314 || mode == DFmode
18315 || mode == SImode
18316 || mode == DImode
18317 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18318 gcc_assert (MEM_P (mem));
18320 addr = XEXP (mem, 0);
18322 switch (GET_CODE (addr))
18324 case PRE_DEC:
18325 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18326 ops[0] = XEXP (addr, 0);
18327 ops[1] = reg;
18328 break;
18330 case POST_INC:
18331 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18332 ops[0] = XEXP (addr, 0);
18333 ops[1] = reg;
18334 break;
18336 default:
18337 templ = "f%s%c%%?\t%%%s0, %%1%s";
18338 ops[0] = reg;
18339 ops[1] = mem;
18340 break;
18343 sprintf (buff, templ,
18344 load ? "ld" : "st",
18345 dp ? 'd' : 's',
18346 dp ? "P" : "",
18347 integer_p ? "\t%@ int" : "");
18348 output_asm_insn (buff, ops);
18350 return "";
18353 /* Output a Neon double-word or quad-word load or store, or a load
18354 or store for larger structure modes.
18356 WARNING: The ordering of elements is weird in big-endian mode,
18357 because the EABI requires that vectors stored in memory appear
18358 as though they were stored by a VSTM, as required by the EABI.
18359 GCC RTL defines element ordering based on in-memory order.
18360 This can be different from the architectural ordering of elements
18361 within a NEON register. The intrinsics defined in arm_neon.h use the
18362 NEON register element ordering, not the GCC RTL element ordering.
18364 For example, the in-memory ordering of a big-endian a quadword
18365 vector with 16-bit elements when stored from register pair {d0,d1}
18366 will be (lowest address first, d0[N] is NEON register element N):
18368 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18370 When necessary, quadword registers (dN, dN+1) are moved to ARM
18371 registers from rN in the order:
18373 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18375 So that STM/LDM can be used on vectors in ARM registers, and the
18376 same memory layout will result as if VSTM/VLDM were used.
18378 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18379 possible, which allows use of appropriate alignment tags.
18380 Note that the choice of "64" is independent of the actual vector
18381 element size; this size simply ensures that the behavior is
18382 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18384 Due to limitations of those instructions, use of VST1.64/VLD1.64
18385 is not possible if:
18386 - the address contains PRE_DEC, or
18387 - the mode refers to more than 4 double-word registers
18389 In those cases, it would be possible to replace VSTM/VLDM by a
18390 sequence of instructions; this is not currently implemented since
18391 this is not certain to actually improve performance. */
18393 const char *
18394 output_move_neon (rtx *operands)
18396 rtx reg, mem, addr, ops[2];
18397 int regno, nregs, load = REG_P (operands[0]);
18398 const char *templ;
18399 char buff[50];
18400 enum machine_mode mode;
18402 reg = operands[!load];
18403 mem = operands[load];
18405 mode = GET_MODE (reg);
18407 gcc_assert (REG_P (reg));
18408 regno = REGNO (reg);
18409 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18410 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18411 || NEON_REGNO_OK_FOR_QUAD (regno));
18412 gcc_assert (VALID_NEON_DREG_MODE (mode)
18413 || VALID_NEON_QREG_MODE (mode)
18414 || VALID_NEON_STRUCT_MODE (mode));
18415 gcc_assert (MEM_P (mem));
18417 addr = XEXP (mem, 0);
18419 /* Strip off const from addresses like (const (plus (...))). */
18420 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18421 addr = XEXP (addr, 0);
18423 switch (GET_CODE (addr))
18425 case POST_INC:
18426 /* We have to use vldm / vstm for too-large modes. */
18427 if (nregs > 4)
18429 templ = "v%smia%%?\t%%0!, %%h1";
18430 ops[0] = XEXP (addr, 0);
18432 else
18434 templ = "v%s1.64\t%%h1, %%A0";
18435 ops[0] = mem;
18437 ops[1] = reg;
18438 break;
18440 case PRE_DEC:
18441 /* We have to use vldm / vstm in this case, since there is no
18442 pre-decrement form of the vld1 / vst1 instructions. */
18443 templ = "v%smdb%%?\t%%0!, %%h1";
18444 ops[0] = XEXP (addr, 0);
18445 ops[1] = reg;
18446 break;
18448 case POST_MODIFY:
18449 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18450 gcc_unreachable ();
18452 case LABEL_REF:
18453 case PLUS:
18455 int i;
18456 int overlap = -1;
18457 for (i = 0; i < nregs; i++)
18459 /* We're only using DImode here because it's a convenient size. */
18460 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18461 ops[1] = adjust_address (mem, DImode, 8 * i);
18462 if (reg_overlap_mentioned_p (ops[0], mem))
18464 gcc_assert (overlap == -1);
18465 overlap = i;
18467 else
18469 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18470 output_asm_insn (buff, ops);
18473 if (overlap != -1)
18475 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18476 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18477 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18478 output_asm_insn (buff, ops);
18481 return "";
18484 default:
18485 /* We have to use vldm / vstm for too-large modes. */
18486 if (nregs > 4)
18487 templ = "v%smia%%?\t%%m0, %%h1";
18488 else
18489 templ = "v%s1.64\t%%h1, %%A0";
18491 ops[0] = mem;
18492 ops[1] = reg;
18495 sprintf (buff, templ, load ? "ld" : "st");
18496 output_asm_insn (buff, ops);
18498 return "";
18501 /* Compute and return the length of neon_mov<mode>, where <mode> is
18502 one of VSTRUCT modes: EI, OI, CI or XI. */
18504 arm_attr_length_move_neon (rtx insn)
18506 rtx reg, mem, addr;
18507 int load;
18508 enum machine_mode mode;
18510 extract_insn_cached (insn);
18512 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18514 mode = GET_MODE (recog_data.operand[0]);
18515 switch (mode)
18517 case EImode:
18518 case OImode:
18519 return 8;
18520 case CImode:
18521 return 12;
18522 case XImode:
18523 return 16;
18524 default:
18525 gcc_unreachable ();
18529 load = REG_P (recog_data.operand[0]);
18530 reg = recog_data.operand[!load];
18531 mem = recog_data.operand[load];
18533 gcc_assert (MEM_P (mem));
18535 mode = GET_MODE (reg);
18536 addr = XEXP (mem, 0);
18538 /* Strip off const from addresses like (const (plus (...))). */
18539 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18540 addr = XEXP (addr, 0);
18542 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18544 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18545 return insns * 4;
18547 else
18548 return 4;
18551 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18552 return zero. */
18555 arm_address_offset_is_imm (rtx insn)
18557 rtx mem, addr;
18559 extract_insn_cached (insn);
18561 if (REG_P (recog_data.operand[0]))
18562 return 0;
18564 mem = recog_data.operand[0];
18566 gcc_assert (MEM_P (mem));
18568 addr = XEXP (mem, 0);
18570 if (REG_P (addr)
18571 || (GET_CODE (addr) == PLUS
18572 && REG_P (XEXP (addr, 0))
18573 && CONST_INT_P (XEXP (addr, 1))))
18574 return 1;
18575 else
18576 return 0;
18579 /* Output an ADD r, s, #n where n may be too big for one instruction.
18580 If adding zero to one register, output nothing. */
18581 const char *
18582 output_add_immediate (rtx *operands)
18584 HOST_WIDE_INT n = INTVAL (operands[2]);
18586 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18588 if (n < 0)
18589 output_multi_immediate (operands,
18590 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18591 -n);
18592 else
18593 output_multi_immediate (operands,
18594 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18598 return "";
18601 /* Output a multiple immediate operation.
18602 OPERANDS is the vector of operands referred to in the output patterns.
18603 INSTR1 is the output pattern to use for the first constant.
18604 INSTR2 is the output pattern to use for subsequent constants.
18605 IMMED_OP is the index of the constant slot in OPERANDS.
18606 N is the constant value. */
18607 static const char *
18608 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18609 int immed_op, HOST_WIDE_INT n)
18611 #if HOST_BITS_PER_WIDE_INT > 32
18612 n &= 0xffffffff;
18613 #endif
18615 if (n == 0)
18617 /* Quick and easy output. */
18618 operands[immed_op] = const0_rtx;
18619 output_asm_insn (instr1, operands);
18621 else
18623 int i;
18624 const char * instr = instr1;
18626 /* Note that n is never zero here (which would give no output). */
18627 for (i = 0; i < 32; i += 2)
18629 if (n & (3 << i))
18631 operands[immed_op] = GEN_INT (n & (255 << i));
18632 output_asm_insn (instr, operands);
18633 instr = instr2;
18634 i += 6;
18639 return "";
18642 /* Return the name of a shifter operation. */
18643 static const char *
18644 arm_shift_nmem(enum rtx_code code)
18646 switch (code)
18648 case ASHIFT:
18649 return ARM_LSL_NAME;
18651 case ASHIFTRT:
18652 return "asr";
18654 case LSHIFTRT:
18655 return "lsr";
18657 case ROTATERT:
18658 return "ror";
18660 default:
18661 abort();
18665 /* Return the appropriate ARM instruction for the operation code.
18666 The returned result should not be overwritten. OP is the rtx of the
18667 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18668 was shifted. */
18669 const char *
18670 arithmetic_instr (rtx op, int shift_first_arg)
18672 switch (GET_CODE (op))
18674 case PLUS:
18675 return "add";
18677 case MINUS:
18678 return shift_first_arg ? "rsb" : "sub";
18680 case IOR:
18681 return "orr";
18683 case XOR:
18684 return "eor";
18686 case AND:
18687 return "and";
18689 case ASHIFT:
18690 case ASHIFTRT:
18691 case LSHIFTRT:
18692 case ROTATERT:
18693 return arm_shift_nmem(GET_CODE(op));
18695 default:
18696 gcc_unreachable ();
18700 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18701 for the operation code. The returned result should not be overwritten.
18702 OP is the rtx code of the shift.
18703 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18704 shift. */
18705 static const char *
18706 shift_op (rtx op, HOST_WIDE_INT *amountp)
18708 const char * mnem;
18709 enum rtx_code code = GET_CODE (op);
18711 switch (code)
18713 case ROTATE:
18714 if (!CONST_INT_P (XEXP (op, 1)))
18716 output_operand_lossage ("invalid shift operand");
18717 return NULL;
18720 code = ROTATERT;
18721 *amountp = 32 - INTVAL (XEXP (op, 1));
18722 mnem = "ror";
18723 break;
18725 case ASHIFT:
18726 case ASHIFTRT:
18727 case LSHIFTRT:
18728 case ROTATERT:
18729 mnem = arm_shift_nmem(code);
18730 if (CONST_INT_P (XEXP (op, 1)))
18732 *amountp = INTVAL (XEXP (op, 1));
18734 else if (REG_P (XEXP (op, 1)))
18736 *amountp = -1;
18737 return mnem;
18739 else
18741 output_operand_lossage ("invalid shift operand");
18742 return NULL;
18744 break;
18746 case MULT:
18747 /* We never have to worry about the amount being other than a
18748 power of 2, since this case can never be reloaded from a reg. */
18749 if (!CONST_INT_P (XEXP (op, 1)))
18751 output_operand_lossage ("invalid shift operand");
18752 return NULL;
18755 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18757 /* Amount must be a power of two. */
18758 if (*amountp & (*amountp - 1))
18760 output_operand_lossage ("invalid shift operand");
18761 return NULL;
18764 *amountp = int_log2 (*amountp);
18765 return ARM_LSL_NAME;
18767 default:
18768 output_operand_lossage ("invalid shift operand");
18769 return NULL;
18772 /* This is not 100% correct, but follows from the desire to merge
18773 multiplication by a power of 2 with the recognizer for a
18774 shift. >=32 is not a valid shift for "lsl", so we must try and
18775 output a shift that produces the correct arithmetical result.
18776 Using lsr #32 is identical except for the fact that the carry bit
18777 is not set correctly if we set the flags; but we never use the
18778 carry bit from such an operation, so we can ignore that. */
18779 if (code == ROTATERT)
18780 /* Rotate is just modulo 32. */
18781 *amountp &= 31;
18782 else if (*amountp != (*amountp & 31))
18784 if (code == ASHIFT)
18785 mnem = "lsr";
18786 *amountp = 32;
18789 /* Shifts of 0 are no-ops. */
18790 if (*amountp == 0)
18791 return NULL;
18793 return mnem;
18796 /* Obtain the shift from the POWER of two. */
18798 static HOST_WIDE_INT
18799 int_log2 (HOST_WIDE_INT power)
18801 HOST_WIDE_INT shift = 0;
18803 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18805 gcc_assert (shift <= 31);
18806 shift++;
18809 return shift;
18812 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18813 because /bin/as is horribly restrictive. The judgement about
18814 whether or not each character is 'printable' (and can be output as
18815 is) or not (and must be printed with an octal escape) must be made
18816 with reference to the *host* character set -- the situation is
18817 similar to that discussed in the comments above pp_c_char in
18818 c-pretty-print.c. */
18820 #define MAX_ASCII_LEN 51
18822 void
18823 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18825 int i;
18826 int len_so_far = 0;
18828 fputs ("\t.ascii\t\"", stream);
18830 for (i = 0; i < len; i++)
18832 int c = p[i];
18834 if (len_so_far >= MAX_ASCII_LEN)
18836 fputs ("\"\n\t.ascii\t\"", stream);
18837 len_so_far = 0;
18840 if (ISPRINT (c))
18842 if (c == '\\' || c == '\"')
18844 putc ('\\', stream);
18845 len_so_far++;
18847 putc (c, stream);
18848 len_so_far++;
18850 else
18852 fprintf (stream, "\\%03o", c);
18853 len_so_far += 4;
18857 fputs ("\"\n", stream);
18860 /* Compute the register save mask for registers 0 through 12
18861 inclusive. This code is used by arm_compute_save_reg_mask. */
18863 static unsigned long
18864 arm_compute_save_reg0_reg12_mask (void)
18866 unsigned long func_type = arm_current_func_type ();
18867 unsigned long save_reg_mask = 0;
18868 unsigned int reg;
18870 if (IS_INTERRUPT (func_type))
18872 unsigned int max_reg;
18873 /* Interrupt functions must not corrupt any registers,
18874 even call clobbered ones. If this is a leaf function
18875 we can just examine the registers used by the RTL, but
18876 otherwise we have to assume that whatever function is
18877 called might clobber anything, and so we have to save
18878 all the call-clobbered registers as well. */
18879 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18880 /* FIQ handlers have registers r8 - r12 banked, so
18881 we only need to check r0 - r7, Normal ISRs only
18882 bank r14 and r15, so we must check up to r12.
18883 r13 is the stack pointer which is always preserved,
18884 so we do not need to consider it here. */
18885 max_reg = 7;
18886 else
18887 max_reg = 12;
18889 for (reg = 0; reg <= max_reg; reg++)
18890 if (df_regs_ever_live_p (reg)
18891 || (! crtl->is_leaf && call_used_regs[reg]))
18892 save_reg_mask |= (1 << reg);
18894 /* Also save the pic base register if necessary. */
18895 if (flag_pic
18896 && !TARGET_SINGLE_PIC_BASE
18897 && arm_pic_register != INVALID_REGNUM
18898 && crtl->uses_pic_offset_table)
18899 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18901 else if (IS_VOLATILE(func_type))
18903 /* For noreturn functions we historically omitted register saves
18904 altogether. However this really messes up debugging. As a
18905 compromise save just the frame pointers. Combined with the link
18906 register saved elsewhere this should be sufficient to get
18907 a backtrace. */
18908 if (frame_pointer_needed)
18909 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18910 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18911 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18912 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18913 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18915 else
18917 /* In the normal case we only need to save those registers
18918 which are call saved and which are used by this function. */
18919 for (reg = 0; reg <= 11; reg++)
18920 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18921 save_reg_mask |= (1 << reg);
18923 /* Handle the frame pointer as a special case. */
18924 if (frame_pointer_needed)
18925 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18927 /* If we aren't loading the PIC register,
18928 don't stack it even though it may be live. */
18929 if (flag_pic
18930 && !TARGET_SINGLE_PIC_BASE
18931 && arm_pic_register != INVALID_REGNUM
18932 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18933 || crtl->uses_pic_offset_table))
18934 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18936 /* The prologue will copy SP into R0, so save it. */
18937 if (IS_STACKALIGN (func_type))
18938 save_reg_mask |= 1;
18941 /* Save registers so the exception handler can modify them. */
18942 if (crtl->calls_eh_return)
18944 unsigned int i;
18946 for (i = 0; ; i++)
18948 reg = EH_RETURN_DATA_REGNO (i);
18949 if (reg == INVALID_REGNUM)
18950 break;
18951 save_reg_mask |= 1 << reg;
18955 return save_reg_mask;
18958 /* Return true if r3 is live at the start of the function. */
18960 static bool
18961 arm_r3_live_at_start_p (void)
18963 /* Just look at cfg info, which is still close enough to correct at this
18964 point. This gives false positives for broken functions that might use
18965 uninitialized data that happens to be allocated in r3, but who cares? */
18966 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18969 /* Compute the number of bytes used to store the static chain register on the
18970 stack, above the stack frame. We need to know this accurately to get the
18971 alignment of the rest of the stack frame correct. */
18973 static int
18974 arm_compute_static_chain_stack_bytes (void)
18976 /* See the defining assertion in arm_expand_prologue. */
18977 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18978 && IS_NESTED (arm_current_func_type ())
18979 && arm_r3_live_at_start_p ()
18980 && crtl->args.pretend_args_size == 0)
18981 return 4;
18983 return 0;
18986 /* Compute a bit mask of which registers need to be
18987 saved on the stack for the current function.
18988 This is used by arm_get_frame_offsets, which may add extra registers. */
18990 static unsigned long
18991 arm_compute_save_reg_mask (void)
18993 unsigned int save_reg_mask = 0;
18994 unsigned long func_type = arm_current_func_type ();
18995 unsigned int reg;
18997 if (IS_NAKED (func_type))
18998 /* This should never really happen. */
18999 return 0;
19001 /* If we are creating a stack frame, then we must save the frame pointer,
19002 IP (which will hold the old stack pointer), LR and the PC. */
19003 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19004 save_reg_mask |=
19005 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19006 | (1 << IP_REGNUM)
19007 | (1 << LR_REGNUM)
19008 | (1 << PC_REGNUM);
19010 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19012 /* Decide if we need to save the link register.
19013 Interrupt routines have their own banked link register,
19014 so they never need to save it.
19015 Otherwise if we do not use the link register we do not need to save
19016 it. If we are pushing other registers onto the stack however, we
19017 can save an instruction in the epilogue by pushing the link register
19018 now and then popping it back into the PC. This incurs extra memory
19019 accesses though, so we only do it when optimizing for size, and only
19020 if we know that we will not need a fancy return sequence. */
19021 if (df_regs_ever_live_p (LR_REGNUM)
19022 || (save_reg_mask
19023 && optimize_size
19024 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19025 && !crtl->calls_eh_return))
19026 save_reg_mask |= 1 << LR_REGNUM;
19028 if (cfun->machine->lr_save_eliminated)
19029 save_reg_mask &= ~ (1 << LR_REGNUM);
19031 if (TARGET_REALLY_IWMMXT
19032 && ((bit_count (save_reg_mask)
19033 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19034 arm_compute_static_chain_stack_bytes())
19035 ) % 2) != 0)
19037 /* The total number of registers that are going to be pushed
19038 onto the stack is odd. We need to ensure that the stack
19039 is 64-bit aligned before we start to save iWMMXt registers,
19040 and also before we start to create locals. (A local variable
19041 might be a double or long long which we will load/store using
19042 an iWMMXt instruction). Therefore we need to push another
19043 ARM register, so that the stack will be 64-bit aligned. We
19044 try to avoid using the arg registers (r0 -r3) as they might be
19045 used to pass values in a tail call. */
19046 for (reg = 4; reg <= 12; reg++)
19047 if ((save_reg_mask & (1 << reg)) == 0)
19048 break;
19050 if (reg <= 12)
19051 save_reg_mask |= (1 << reg);
19052 else
19054 cfun->machine->sibcall_blocked = 1;
19055 save_reg_mask |= (1 << 3);
19059 /* We may need to push an additional register for use initializing the
19060 PIC base register. */
19061 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19062 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19064 reg = thumb_find_work_register (1 << 4);
19065 if (!call_used_regs[reg])
19066 save_reg_mask |= (1 << reg);
19069 return save_reg_mask;
19073 /* Compute a bit mask of which registers need to be
19074 saved on the stack for the current function. */
19075 static unsigned long
19076 thumb1_compute_save_reg_mask (void)
19078 unsigned long mask;
19079 unsigned reg;
19081 mask = 0;
19082 for (reg = 0; reg < 12; reg ++)
19083 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19084 mask |= 1 << reg;
19086 if (flag_pic
19087 && !TARGET_SINGLE_PIC_BASE
19088 && arm_pic_register != INVALID_REGNUM
19089 && crtl->uses_pic_offset_table)
19090 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19092 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19093 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19094 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19096 /* LR will also be pushed if any lo regs are pushed. */
19097 if (mask & 0xff || thumb_force_lr_save ())
19098 mask |= (1 << LR_REGNUM);
19100 /* Make sure we have a low work register if we need one.
19101 We will need one if we are going to push a high register,
19102 but we are not currently intending to push a low register. */
19103 if ((mask & 0xff) == 0
19104 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19106 /* Use thumb_find_work_register to choose which register
19107 we will use. If the register is live then we will
19108 have to push it. Use LAST_LO_REGNUM as our fallback
19109 choice for the register to select. */
19110 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19111 /* Make sure the register returned by thumb_find_work_register is
19112 not part of the return value. */
19113 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19114 reg = LAST_LO_REGNUM;
19116 if (! call_used_regs[reg])
19117 mask |= 1 << reg;
19120 /* The 504 below is 8 bytes less than 512 because there are two possible
19121 alignment words. We can't tell here if they will be present or not so we
19122 have to play it safe and assume that they are. */
19123 if ((CALLER_INTERWORKING_SLOT_SIZE +
19124 ROUND_UP_WORD (get_frame_size ()) +
19125 crtl->outgoing_args_size) >= 504)
19127 /* This is the same as the code in thumb1_expand_prologue() which
19128 determines which register to use for stack decrement. */
19129 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19130 if (mask & (1 << reg))
19131 break;
19133 if (reg > LAST_LO_REGNUM)
19135 /* Make sure we have a register available for stack decrement. */
19136 mask |= 1 << LAST_LO_REGNUM;
19140 return mask;
19144 /* Return the number of bytes required to save VFP registers. */
19145 static int
19146 arm_get_vfp_saved_size (void)
19148 unsigned int regno;
19149 int count;
19150 int saved;
19152 saved = 0;
19153 /* Space for saved VFP registers. */
19154 if (TARGET_HARD_FLOAT && TARGET_VFP)
19156 count = 0;
19157 for (regno = FIRST_VFP_REGNUM;
19158 regno < LAST_VFP_REGNUM;
19159 regno += 2)
19161 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19162 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19164 if (count > 0)
19166 /* Workaround ARM10 VFPr1 bug. */
19167 if (count == 2 && !arm_arch6)
19168 count++;
19169 saved += count * 8;
19171 count = 0;
19173 else
19174 count++;
19176 if (count > 0)
19178 if (count == 2 && !arm_arch6)
19179 count++;
19180 saved += count * 8;
19183 return saved;
19187 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19188 everything bar the final return instruction. If simple_return is true,
19189 then do not output epilogue, because it has already been emitted in RTL. */
19190 const char *
19191 output_return_instruction (rtx operand, bool really_return, bool reverse,
19192 bool simple_return)
19194 char conditional[10];
19195 char instr[100];
19196 unsigned reg;
19197 unsigned long live_regs_mask;
19198 unsigned long func_type;
19199 arm_stack_offsets *offsets;
19201 func_type = arm_current_func_type ();
19203 if (IS_NAKED (func_type))
19204 return "";
19206 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19208 /* If this function was declared non-returning, and we have
19209 found a tail call, then we have to trust that the called
19210 function won't return. */
19211 if (really_return)
19213 rtx ops[2];
19215 /* Otherwise, trap an attempted return by aborting. */
19216 ops[0] = operand;
19217 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19218 : "abort");
19219 assemble_external_libcall (ops[1]);
19220 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19223 return "";
19226 gcc_assert (!cfun->calls_alloca || really_return);
19228 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19230 cfun->machine->return_used_this_function = 1;
19232 offsets = arm_get_frame_offsets ();
19233 live_regs_mask = offsets->saved_regs_mask;
19235 if (!simple_return && live_regs_mask)
19237 const char * return_reg;
19239 /* If we do not have any special requirements for function exit
19240 (e.g. interworking) then we can load the return address
19241 directly into the PC. Otherwise we must load it into LR. */
19242 if (really_return
19243 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19244 return_reg = reg_names[PC_REGNUM];
19245 else
19246 return_reg = reg_names[LR_REGNUM];
19248 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19250 /* There are three possible reasons for the IP register
19251 being saved. 1) a stack frame was created, in which case
19252 IP contains the old stack pointer, or 2) an ISR routine
19253 corrupted it, or 3) it was saved to align the stack on
19254 iWMMXt. In case 1, restore IP into SP, otherwise just
19255 restore IP. */
19256 if (frame_pointer_needed)
19258 live_regs_mask &= ~ (1 << IP_REGNUM);
19259 live_regs_mask |= (1 << SP_REGNUM);
19261 else
19262 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19265 /* On some ARM architectures it is faster to use LDR rather than
19266 LDM to load a single register. On other architectures, the
19267 cost is the same. In 26 bit mode, or for exception handlers,
19268 we have to use LDM to load the PC so that the CPSR is also
19269 restored. */
19270 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19271 if (live_regs_mask == (1U << reg))
19272 break;
19274 if (reg <= LAST_ARM_REGNUM
19275 && (reg != LR_REGNUM
19276 || ! really_return
19277 || ! IS_INTERRUPT (func_type)))
19279 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19280 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19282 else
19284 char *p;
19285 int first = 1;
19287 /* Generate the load multiple instruction to restore the
19288 registers. Note we can get here, even if
19289 frame_pointer_needed is true, but only if sp already
19290 points to the base of the saved core registers. */
19291 if (live_regs_mask & (1 << SP_REGNUM))
19293 unsigned HOST_WIDE_INT stack_adjust;
19295 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19296 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19298 if (stack_adjust && arm_arch5 && TARGET_ARM)
19299 if (TARGET_UNIFIED_ASM)
19300 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19301 else
19302 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19303 else
19305 /* If we can't use ldmib (SA110 bug),
19306 then try to pop r3 instead. */
19307 if (stack_adjust)
19308 live_regs_mask |= 1 << 3;
19310 if (TARGET_UNIFIED_ASM)
19311 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19312 else
19313 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19316 else
19317 if (TARGET_UNIFIED_ASM)
19318 sprintf (instr, "pop%s\t{", conditional);
19319 else
19320 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19322 p = instr + strlen (instr);
19324 for (reg = 0; reg <= SP_REGNUM; reg++)
19325 if (live_regs_mask & (1 << reg))
19327 int l = strlen (reg_names[reg]);
19329 if (first)
19330 first = 0;
19331 else
19333 memcpy (p, ", ", 2);
19334 p += 2;
19337 memcpy (p, "%|", 2);
19338 memcpy (p + 2, reg_names[reg], l);
19339 p += l + 2;
19342 if (live_regs_mask & (1 << LR_REGNUM))
19344 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19345 /* If returning from an interrupt, restore the CPSR. */
19346 if (IS_INTERRUPT (func_type))
19347 strcat (p, "^");
19349 else
19350 strcpy (p, "}");
19353 output_asm_insn (instr, & operand);
19355 /* See if we need to generate an extra instruction to
19356 perform the actual function return. */
19357 if (really_return
19358 && func_type != ARM_FT_INTERWORKED
19359 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19361 /* The return has already been handled
19362 by loading the LR into the PC. */
19363 return "";
19367 if (really_return)
19369 switch ((int) ARM_FUNC_TYPE (func_type))
19371 case ARM_FT_ISR:
19372 case ARM_FT_FIQ:
19373 /* ??? This is wrong for unified assembly syntax. */
19374 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19375 break;
19377 case ARM_FT_INTERWORKED:
19378 sprintf (instr, "bx%s\t%%|lr", conditional);
19379 break;
19381 case ARM_FT_EXCEPTION:
19382 /* ??? This is wrong for unified assembly syntax. */
19383 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19384 break;
19386 default:
19387 /* Use bx if it's available. */
19388 if (arm_arch5 || arm_arch4t)
19389 sprintf (instr, "bx%s\t%%|lr", conditional);
19390 else
19391 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19392 break;
19395 output_asm_insn (instr, & operand);
19398 return "";
19401 /* Write the function name into the code section, directly preceding
19402 the function prologue.
19404 Code will be output similar to this:
19406 .ascii "arm_poke_function_name", 0
19407 .align
19409 .word 0xff000000 + (t1 - t0)
19410 arm_poke_function_name
19411 mov ip, sp
19412 stmfd sp!, {fp, ip, lr, pc}
19413 sub fp, ip, #4
19415 When performing a stack backtrace, code can inspect the value
19416 of 'pc' stored at 'fp' + 0. If the trace function then looks
19417 at location pc - 12 and the top 8 bits are set, then we know
19418 that there is a function name embedded immediately preceding this
19419 location and has length ((pc[-3]) & 0xff000000).
19421 We assume that pc is declared as a pointer to an unsigned long.
19423 It is of no benefit to output the function name if we are assembling
19424 a leaf function. These function types will not contain a stack
19425 backtrace structure, therefore it is not possible to determine the
19426 function name. */
19427 void
19428 arm_poke_function_name (FILE *stream, const char *name)
19430 unsigned long alignlength;
19431 unsigned long length;
19432 rtx x;
19434 length = strlen (name) + 1;
19435 alignlength = ROUND_UP_WORD (length);
19437 ASM_OUTPUT_ASCII (stream, name, length);
19438 ASM_OUTPUT_ALIGN (stream, 2);
19439 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19440 assemble_aligned_integer (UNITS_PER_WORD, x);
19443 /* Place some comments into the assembler stream
19444 describing the current function. */
19445 static void
19446 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19448 unsigned long func_type;
19450 /* ??? Do we want to print some of the below anyway? */
19451 if (TARGET_THUMB1)
19452 return;
19454 /* Sanity check. */
19455 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19457 func_type = arm_current_func_type ();
19459 switch ((int) ARM_FUNC_TYPE (func_type))
19461 default:
19462 case ARM_FT_NORMAL:
19463 break;
19464 case ARM_FT_INTERWORKED:
19465 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19466 break;
19467 case ARM_FT_ISR:
19468 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19469 break;
19470 case ARM_FT_FIQ:
19471 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19472 break;
19473 case ARM_FT_EXCEPTION:
19474 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19475 break;
19478 if (IS_NAKED (func_type))
19479 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19481 if (IS_VOLATILE (func_type))
19482 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19484 if (IS_NESTED (func_type))
19485 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19486 if (IS_STACKALIGN (func_type))
19487 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19489 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19490 crtl->args.size,
19491 crtl->args.pretend_args_size, frame_size);
19493 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19494 frame_pointer_needed,
19495 cfun->machine->uses_anonymous_args);
19497 if (cfun->machine->lr_save_eliminated)
19498 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19500 if (crtl->calls_eh_return)
19501 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19505 static void
19506 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19507 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19509 arm_stack_offsets *offsets;
19511 if (TARGET_THUMB1)
19513 int regno;
19515 /* Emit any call-via-reg trampolines that are needed for v4t support
19516 of call_reg and call_value_reg type insns. */
19517 for (regno = 0; regno < LR_REGNUM; regno++)
19519 rtx label = cfun->machine->call_via[regno];
19521 if (label != NULL)
19523 switch_to_section (function_section (current_function_decl));
19524 targetm.asm_out.internal_label (asm_out_file, "L",
19525 CODE_LABEL_NUMBER (label));
19526 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19530 /* ??? Probably not safe to set this here, since it assumes that a
19531 function will be emitted as assembly immediately after we generate
19532 RTL for it. This does not happen for inline functions. */
19533 cfun->machine->return_used_this_function = 0;
19535 else /* TARGET_32BIT */
19537 /* We need to take into account any stack-frame rounding. */
19538 offsets = arm_get_frame_offsets ();
19540 gcc_assert (!use_return_insn (FALSE, NULL)
19541 || (cfun->machine->return_used_this_function != 0)
19542 || offsets->saved_regs == offsets->outgoing_args
19543 || frame_pointer_needed);
19547 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19548 STR and STRD. If an even number of registers are being pushed, one
19549 or more STRD patterns are created for each register pair. If an
19550 odd number of registers are pushed, emit an initial STR followed by
19551 as many STRD instructions as are needed. This works best when the
19552 stack is initially 64-bit aligned (the normal case), since it
19553 ensures that each STRD is also 64-bit aligned. */
19554 static void
19555 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19557 int num_regs = 0;
19558 int i;
19559 int regno;
19560 rtx par = NULL_RTX;
19561 rtx dwarf = NULL_RTX;
19562 rtx tmp;
19563 bool first = true;
19565 num_regs = bit_count (saved_regs_mask);
19567 /* Must be at least one register to save, and can't save SP or PC. */
19568 gcc_assert (num_regs > 0 && num_regs <= 14);
19569 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19570 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19572 /* Create sequence for DWARF info. All the frame-related data for
19573 debugging is held in this wrapper. */
19574 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19576 /* Describe the stack adjustment. */
19577 tmp = gen_rtx_SET (VOIDmode,
19578 stack_pointer_rtx,
19579 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19580 RTX_FRAME_RELATED_P (tmp) = 1;
19581 XVECEXP (dwarf, 0, 0) = tmp;
19583 /* Find the first register. */
19584 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19587 i = 0;
19589 /* If there's an odd number of registers to push. Start off by
19590 pushing a single register. This ensures that subsequent strd
19591 operations are dword aligned (assuming that SP was originally
19592 64-bit aligned). */
19593 if ((num_regs & 1) != 0)
19595 rtx reg, mem, insn;
19597 reg = gen_rtx_REG (SImode, regno);
19598 if (num_regs == 1)
19599 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19600 stack_pointer_rtx));
19601 else
19602 mem = gen_frame_mem (Pmode,
19603 gen_rtx_PRE_MODIFY
19604 (Pmode, stack_pointer_rtx,
19605 plus_constant (Pmode, stack_pointer_rtx,
19606 -4 * num_regs)));
19608 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19609 RTX_FRAME_RELATED_P (tmp) = 1;
19610 insn = emit_insn (tmp);
19611 RTX_FRAME_RELATED_P (insn) = 1;
19612 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19613 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19614 reg);
19615 RTX_FRAME_RELATED_P (tmp) = 1;
19616 i++;
19617 regno++;
19618 XVECEXP (dwarf, 0, i) = tmp;
19619 first = false;
19622 while (i < num_regs)
19623 if (saved_regs_mask & (1 << regno))
19625 rtx reg1, reg2, mem1, mem2;
19626 rtx tmp0, tmp1, tmp2;
19627 int regno2;
19629 /* Find the register to pair with this one. */
19630 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19631 regno2++)
19634 reg1 = gen_rtx_REG (SImode, regno);
19635 reg2 = gen_rtx_REG (SImode, regno2);
19637 if (first)
19639 rtx insn;
19641 first = false;
19642 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19643 stack_pointer_rtx,
19644 -4 * num_regs));
19645 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19646 stack_pointer_rtx,
19647 -4 * (num_regs - 1)));
19648 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19649 plus_constant (Pmode, stack_pointer_rtx,
19650 -4 * (num_regs)));
19651 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19652 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19653 RTX_FRAME_RELATED_P (tmp0) = 1;
19654 RTX_FRAME_RELATED_P (tmp1) = 1;
19655 RTX_FRAME_RELATED_P (tmp2) = 1;
19656 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19657 XVECEXP (par, 0, 0) = tmp0;
19658 XVECEXP (par, 0, 1) = tmp1;
19659 XVECEXP (par, 0, 2) = tmp2;
19660 insn = emit_insn (par);
19661 RTX_FRAME_RELATED_P (insn) = 1;
19662 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19664 else
19666 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19667 stack_pointer_rtx,
19668 4 * i));
19669 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19670 stack_pointer_rtx,
19671 4 * (i + 1)));
19672 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19673 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19674 RTX_FRAME_RELATED_P (tmp1) = 1;
19675 RTX_FRAME_RELATED_P (tmp2) = 1;
19676 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19677 XVECEXP (par, 0, 0) = tmp1;
19678 XVECEXP (par, 0, 1) = tmp2;
19679 emit_insn (par);
19682 /* Create unwind information. This is an approximation. */
19683 tmp1 = gen_rtx_SET (VOIDmode,
19684 gen_frame_mem (Pmode,
19685 plus_constant (Pmode,
19686 stack_pointer_rtx,
19687 4 * i)),
19688 reg1);
19689 tmp2 = gen_rtx_SET (VOIDmode,
19690 gen_frame_mem (Pmode,
19691 plus_constant (Pmode,
19692 stack_pointer_rtx,
19693 4 * (i + 1))),
19694 reg2);
19696 RTX_FRAME_RELATED_P (tmp1) = 1;
19697 RTX_FRAME_RELATED_P (tmp2) = 1;
19698 XVECEXP (dwarf, 0, i + 1) = tmp1;
19699 XVECEXP (dwarf, 0, i + 2) = tmp2;
19700 i += 2;
19701 regno = regno2 + 1;
19703 else
19704 regno++;
19706 return;
19709 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19710 whenever possible, otherwise it emits single-word stores. The first store
19711 also allocates stack space for all saved registers, using writeback with
19712 post-addressing mode. All other stores use offset addressing. If no STRD
19713 can be emitted, this function emits a sequence of single-word stores,
19714 and not an STM as before, because single-word stores provide more freedom
19715 scheduling and can be turned into an STM by peephole optimizations. */
19716 static void
19717 arm_emit_strd_push (unsigned long saved_regs_mask)
19719 int num_regs = 0;
19720 int i, j, dwarf_index = 0;
19721 int offset = 0;
19722 rtx dwarf = NULL_RTX;
19723 rtx insn = NULL_RTX;
19724 rtx tmp, mem;
19726 /* TODO: A more efficient code can be emitted by changing the
19727 layout, e.g., first push all pairs that can use STRD to keep the
19728 stack aligned, and then push all other registers. */
19729 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19730 if (saved_regs_mask & (1 << i))
19731 num_regs++;
19733 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19734 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19735 gcc_assert (num_regs > 0);
19737 /* Create sequence for DWARF info. */
19738 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19740 /* For dwarf info, we generate explicit stack update. */
19741 tmp = gen_rtx_SET (VOIDmode,
19742 stack_pointer_rtx,
19743 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19744 RTX_FRAME_RELATED_P (tmp) = 1;
19745 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19747 /* Save registers. */
19748 offset = - 4 * num_regs;
19749 j = 0;
19750 while (j <= LAST_ARM_REGNUM)
19751 if (saved_regs_mask & (1 << j))
19753 if ((j % 2 == 0)
19754 && (saved_regs_mask & (1 << (j + 1))))
19756 /* Current register and previous register form register pair for
19757 which STRD can be generated. */
19758 if (offset < 0)
19760 /* Allocate stack space for all saved registers. */
19761 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19762 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19763 mem = gen_frame_mem (DImode, tmp);
19764 offset = 0;
19766 else if (offset > 0)
19767 mem = gen_frame_mem (DImode,
19768 plus_constant (Pmode,
19769 stack_pointer_rtx,
19770 offset));
19771 else
19772 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19774 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19775 RTX_FRAME_RELATED_P (tmp) = 1;
19776 tmp = emit_insn (tmp);
19778 /* Record the first store insn. */
19779 if (dwarf_index == 1)
19780 insn = tmp;
19782 /* Generate dwarf info. */
19783 mem = gen_frame_mem (SImode,
19784 plus_constant (Pmode,
19785 stack_pointer_rtx,
19786 offset));
19787 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19788 RTX_FRAME_RELATED_P (tmp) = 1;
19789 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19791 mem = gen_frame_mem (SImode,
19792 plus_constant (Pmode,
19793 stack_pointer_rtx,
19794 offset + 4));
19795 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19796 RTX_FRAME_RELATED_P (tmp) = 1;
19797 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19799 offset += 8;
19800 j += 2;
19802 else
19804 /* Emit a single word store. */
19805 if (offset < 0)
19807 /* Allocate stack space for all saved registers. */
19808 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19809 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19810 mem = gen_frame_mem (SImode, tmp);
19811 offset = 0;
19813 else if (offset > 0)
19814 mem = gen_frame_mem (SImode,
19815 plus_constant (Pmode,
19816 stack_pointer_rtx,
19817 offset));
19818 else
19819 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19821 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19822 RTX_FRAME_RELATED_P (tmp) = 1;
19823 tmp = emit_insn (tmp);
19825 /* Record the first store insn. */
19826 if (dwarf_index == 1)
19827 insn = tmp;
19829 /* Generate dwarf info. */
19830 mem = gen_frame_mem (SImode,
19831 plus_constant(Pmode,
19832 stack_pointer_rtx,
19833 offset));
19834 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19835 RTX_FRAME_RELATED_P (tmp) = 1;
19836 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19838 offset += 4;
19839 j += 1;
19842 else
19843 j++;
19845 /* Attach dwarf info to the first insn we generate. */
19846 gcc_assert (insn != NULL_RTX);
19847 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19848 RTX_FRAME_RELATED_P (insn) = 1;
19851 /* Generate and emit an insn that we will recognize as a push_multi.
19852 Unfortunately, since this insn does not reflect very well the actual
19853 semantics of the operation, we need to annotate the insn for the benefit
19854 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19855 MASK for registers that should be annotated for DWARF2 frame unwind
19856 information. */
19857 static rtx
19858 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19860 int num_regs = 0;
19861 int num_dwarf_regs = 0;
19862 int i, j;
19863 rtx par;
19864 rtx dwarf;
19865 int dwarf_par_index;
19866 rtx tmp, reg;
19868 /* We don't record the PC in the dwarf frame information. */
19869 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19871 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19873 if (mask & (1 << i))
19874 num_regs++;
19875 if (dwarf_regs_mask & (1 << i))
19876 num_dwarf_regs++;
19879 gcc_assert (num_regs && num_regs <= 16);
19880 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19882 /* For the body of the insn we are going to generate an UNSPEC in
19883 parallel with several USEs. This allows the insn to be recognized
19884 by the push_multi pattern in the arm.md file.
19886 The body of the insn looks something like this:
19888 (parallel [
19889 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19890 (const_int:SI <num>)))
19891 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19892 (use (reg:SI XX))
19893 (use (reg:SI YY))
19897 For the frame note however, we try to be more explicit and actually
19898 show each register being stored into the stack frame, plus a (single)
19899 decrement of the stack pointer. We do it this way in order to be
19900 friendly to the stack unwinding code, which only wants to see a single
19901 stack decrement per instruction. The RTL we generate for the note looks
19902 something like this:
19904 (sequence [
19905 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19906 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19907 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19908 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19912 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19913 instead we'd have a parallel expression detailing all
19914 the stores to the various memory addresses so that debug
19915 information is more up-to-date. Remember however while writing
19916 this to take care of the constraints with the push instruction.
19918 Note also that this has to be taken care of for the VFP registers.
19920 For more see PR43399. */
19922 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19923 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19924 dwarf_par_index = 1;
19926 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19928 if (mask & (1 << i))
19930 reg = gen_rtx_REG (SImode, i);
19932 XVECEXP (par, 0, 0)
19933 = gen_rtx_SET (VOIDmode,
19934 gen_frame_mem
19935 (BLKmode,
19936 gen_rtx_PRE_MODIFY (Pmode,
19937 stack_pointer_rtx,
19938 plus_constant
19939 (Pmode, stack_pointer_rtx,
19940 -4 * num_regs))
19942 gen_rtx_UNSPEC (BLKmode,
19943 gen_rtvec (1, reg),
19944 UNSPEC_PUSH_MULT));
19946 if (dwarf_regs_mask & (1 << i))
19948 tmp = gen_rtx_SET (VOIDmode,
19949 gen_frame_mem (SImode, stack_pointer_rtx),
19950 reg);
19951 RTX_FRAME_RELATED_P (tmp) = 1;
19952 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19955 break;
19959 for (j = 1, i++; j < num_regs; i++)
19961 if (mask & (1 << i))
19963 reg = gen_rtx_REG (SImode, i);
19965 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19967 if (dwarf_regs_mask & (1 << i))
19970 = gen_rtx_SET (VOIDmode,
19971 gen_frame_mem
19972 (SImode,
19973 plus_constant (Pmode, stack_pointer_rtx,
19974 4 * j)),
19975 reg);
19976 RTX_FRAME_RELATED_P (tmp) = 1;
19977 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19980 j++;
19984 par = emit_insn (par);
19986 tmp = gen_rtx_SET (VOIDmode,
19987 stack_pointer_rtx,
19988 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19989 RTX_FRAME_RELATED_P (tmp) = 1;
19990 XVECEXP (dwarf, 0, 0) = tmp;
19992 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19994 return par;
19997 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19998 SIZE is the offset to be adjusted.
19999 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20000 static void
20001 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20003 rtx dwarf;
20005 RTX_FRAME_RELATED_P (insn) = 1;
20006 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20007 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20010 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20011 SAVED_REGS_MASK shows which registers need to be restored.
20013 Unfortunately, since this insn does not reflect very well the actual
20014 semantics of the operation, we need to annotate the insn for the benefit
20015 of DWARF2 frame unwind information. */
20016 static void
20017 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20019 int num_regs = 0;
20020 int i, j;
20021 rtx par;
20022 rtx dwarf = NULL_RTX;
20023 rtx tmp, reg;
20024 bool return_in_pc;
20025 int offset_adj;
20026 int emit_update;
20028 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20029 offset_adj = return_in_pc ? 1 : 0;
20030 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20031 if (saved_regs_mask & (1 << i))
20032 num_regs++;
20034 gcc_assert (num_regs && num_regs <= 16);
20036 /* If SP is in reglist, then we don't emit SP update insn. */
20037 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20039 /* The parallel needs to hold num_regs SETs
20040 and one SET for the stack update. */
20041 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20043 if (return_in_pc)
20045 tmp = ret_rtx;
20046 XVECEXP (par, 0, 0) = tmp;
20049 if (emit_update)
20051 /* Increment the stack pointer, based on there being
20052 num_regs 4-byte registers to restore. */
20053 tmp = gen_rtx_SET (VOIDmode,
20054 stack_pointer_rtx,
20055 plus_constant (Pmode,
20056 stack_pointer_rtx,
20057 4 * num_regs));
20058 RTX_FRAME_RELATED_P (tmp) = 1;
20059 XVECEXP (par, 0, offset_adj) = tmp;
20062 /* Now restore every reg, which may include PC. */
20063 for (j = 0, i = 0; j < num_regs; i++)
20064 if (saved_regs_mask & (1 << i))
20066 reg = gen_rtx_REG (SImode, i);
20067 if ((num_regs == 1) && emit_update && !return_in_pc)
20069 /* Emit single load with writeback. */
20070 tmp = gen_frame_mem (SImode,
20071 gen_rtx_POST_INC (Pmode,
20072 stack_pointer_rtx));
20073 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20074 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20075 return;
20078 tmp = gen_rtx_SET (VOIDmode,
20079 reg,
20080 gen_frame_mem
20081 (SImode,
20082 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20083 RTX_FRAME_RELATED_P (tmp) = 1;
20084 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20086 /* We need to maintain a sequence for DWARF info too. As dwarf info
20087 should not have PC, skip PC. */
20088 if (i != PC_REGNUM)
20089 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20091 j++;
20094 if (return_in_pc)
20095 par = emit_jump_insn (par);
20096 else
20097 par = emit_insn (par);
20099 REG_NOTES (par) = dwarf;
20100 if (!return_in_pc)
20101 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20102 stack_pointer_rtx, stack_pointer_rtx);
20105 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20106 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20108 Unfortunately, since this insn does not reflect very well the actual
20109 semantics of the operation, we need to annotate the insn for the benefit
20110 of DWARF2 frame unwind information. */
20111 static void
20112 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20114 int i, j;
20115 rtx par;
20116 rtx dwarf = NULL_RTX;
20117 rtx tmp, reg;
20119 gcc_assert (num_regs && num_regs <= 32);
20121 /* Workaround ARM10 VFPr1 bug. */
20122 if (num_regs == 2 && !arm_arch6)
20124 if (first_reg == 15)
20125 first_reg--;
20127 num_regs++;
20130 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20131 there could be up to 32 D-registers to restore.
20132 If there are more than 16 D-registers, make two recursive calls,
20133 each of which emits one pop_multi instruction. */
20134 if (num_regs > 16)
20136 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20137 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20138 return;
20141 /* The parallel needs to hold num_regs SETs
20142 and one SET for the stack update. */
20143 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20145 /* Increment the stack pointer, based on there being
20146 num_regs 8-byte registers to restore. */
20147 tmp = gen_rtx_SET (VOIDmode,
20148 base_reg,
20149 plus_constant (Pmode, base_reg, 8 * num_regs));
20150 RTX_FRAME_RELATED_P (tmp) = 1;
20151 XVECEXP (par, 0, 0) = tmp;
20153 /* Now show every reg that will be restored, using a SET for each. */
20154 for (j = 0, i=first_reg; j < num_regs; i += 2)
20156 reg = gen_rtx_REG (DFmode, i);
20158 tmp = gen_rtx_SET (VOIDmode,
20159 reg,
20160 gen_frame_mem
20161 (DFmode,
20162 plus_constant (Pmode, base_reg, 8 * j)));
20163 RTX_FRAME_RELATED_P (tmp) = 1;
20164 XVECEXP (par, 0, j + 1) = tmp;
20166 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20168 j++;
20171 par = emit_insn (par);
20172 REG_NOTES (par) = dwarf;
20174 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20175 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20177 RTX_FRAME_RELATED_P (par) = 1;
20178 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20180 else
20181 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20182 base_reg, base_reg);
20185 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20186 number of registers are being popped, multiple LDRD patterns are created for
20187 all register pairs. If odd number of registers are popped, last register is
20188 loaded by using LDR pattern. */
20189 static void
20190 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20192 int num_regs = 0;
20193 int i, j;
20194 rtx par = NULL_RTX;
20195 rtx dwarf = NULL_RTX;
20196 rtx tmp, reg, tmp1;
20197 bool return_in_pc;
20199 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20200 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20201 if (saved_regs_mask & (1 << i))
20202 num_regs++;
20204 gcc_assert (num_regs && num_regs <= 16);
20206 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20207 to be popped. So, if num_regs is even, now it will become odd,
20208 and we can generate pop with PC. If num_regs is odd, it will be
20209 even now, and ldr with return can be generated for PC. */
20210 if (return_in_pc)
20211 num_regs--;
20213 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20215 /* Var j iterates over all the registers to gather all the registers in
20216 saved_regs_mask. Var i gives index of saved registers in stack frame.
20217 A PARALLEL RTX of register-pair is created here, so that pattern for
20218 LDRD can be matched. As PC is always last register to be popped, and
20219 we have already decremented num_regs if PC, we don't have to worry
20220 about PC in this loop. */
20221 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20222 if (saved_regs_mask & (1 << j))
20224 /* Create RTX for memory load. */
20225 reg = gen_rtx_REG (SImode, j);
20226 tmp = gen_rtx_SET (SImode,
20227 reg,
20228 gen_frame_mem (SImode,
20229 plus_constant (Pmode,
20230 stack_pointer_rtx, 4 * i)));
20231 RTX_FRAME_RELATED_P (tmp) = 1;
20233 if (i % 2 == 0)
20235 /* When saved-register index (i) is even, the RTX to be emitted is
20236 yet to be created. Hence create it first. The LDRD pattern we
20237 are generating is :
20238 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20239 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20240 where target registers need not be consecutive. */
20241 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20242 dwarf = NULL_RTX;
20245 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20246 added as 0th element and if i is odd, reg_i is added as 1st element
20247 of LDRD pattern shown above. */
20248 XVECEXP (par, 0, (i % 2)) = tmp;
20249 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20251 if ((i % 2) == 1)
20253 /* When saved-register index (i) is odd, RTXs for both the registers
20254 to be loaded are generated in above given LDRD pattern, and the
20255 pattern can be emitted now. */
20256 par = emit_insn (par);
20257 REG_NOTES (par) = dwarf;
20258 RTX_FRAME_RELATED_P (par) = 1;
20261 i++;
20264 /* If the number of registers pushed is odd AND return_in_pc is false OR
20265 number of registers are even AND return_in_pc is true, last register is
20266 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20267 then LDR with post increment. */
20269 /* Increment the stack pointer, based on there being
20270 num_regs 4-byte registers to restore. */
20271 tmp = gen_rtx_SET (VOIDmode,
20272 stack_pointer_rtx,
20273 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20274 RTX_FRAME_RELATED_P (tmp) = 1;
20275 tmp = emit_insn (tmp);
20276 if (!return_in_pc)
20278 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20279 stack_pointer_rtx, stack_pointer_rtx);
20282 dwarf = NULL_RTX;
20284 if (((num_regs % 2) == 1 && !return_in_pc)
20285 || ((num_regs % 2) == 0 && return_in_pc))
20287 /* Scan for the single register to be popped. Skip until the saved
20288 register is found. */
20289 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20291 /* Gen LDR with post increment here. */
20292 tmp1 = gen_rtx_MEM (SImode,
20293 gen_rtx_POST_INC (SImode,
20294 stack_pointer_rtx));
20295 set_mem_alias_set (tmp1, get_frame_alias_set ());
20297 reg = gen_rtx_REG (SImode, j);
20298 tmp = gen_rtx_SET (SImode, reg, tmp1);
20299 RTX_FRAME_RELATED_P (tmp) = 1;
20300 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20302 if (return_in_pc)
20304 /* If return_in_pc, j must be PC_REGNUM. */
20305 gcc_assert (j == PC_REGNUM);
20306 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20307 XVECEXP (par, 0, 0) = ret_rtx;
20308 XVECEXP (par, 0, 1) = tmp;
20309 par = emit_jump_insn (par);
20311 else
20313 par = emit_insn (tmp);
20314 REG_NOTES (par) = dwarf;
20315 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20316 stack_pointer_rtx, stack_pointer_rtx);
20320 else if ((num_regs % 2) == 1 && return_in_pc)
20322 /* There are 2 registers to be popped. So, generate the pattern
20323 pop_multiple_with_stack_update_and_return to pop in PC. */
20324 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20327 return;
20330 /* LDRD in ARM mode needs consecutive registers as operands. This function
20331 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20332 offset addressing and then generates one separate stack udpate. This provides
20333 more scheduling freedom, compared to writeback on every load. However,
20334 if the function returns using load into PC directly
20335 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20336 before the last load. TODO: Add a peephole optimization to recognize
20337 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20338 peephole optimization to merge the load at stack-offset zero
20339 with the stack update instruction using load with writeback
20340 in post-index addressing mode. */
20341 static void
20342 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20344 int j = 0;
20345 int offset = 0;
20346 rtx par = NULL_RTX;
20347 rtx dwarf = NULL_RTX;
20348 rtx tmp, mem;
20350 /* Restore saved registers. */
20351 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20352 j = 0;
20353 while (j <= LAST_ARM_REGNUM)
20354 if (saved_regs_mask & (1 << j))
20356 if ((j % 2) == 0
20357 && (saved_regs_mask & (1 << (j + 1)))
20358 && (j + 1) != PC_REGNUM)
20360 /* Current register and next register form register pair for which
20361 LDRD can be generated. PC is always the last register popped, and
20362 we handle it separately. */
20363 if (offset > 0)
20364 mem = gen_frame_mem (DImode,
20365 plus_constant (Pmode,
20366 stack_pointer_rtx,
20367 offset));
20368 else
20369 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20371 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20372 tmp = emit_insn (tmp);
20373 RTX_FRAME_RELATED_P (tmp) = 1;
20375 /* Generate dwarf info. */
20377 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20378 gen_rtx_REG (SImode, j),
20379 NULL_RTX);
20380 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20381 gen_rtx_REG (SImode, j + 1),
20382 dwarf);
20384 REG_NOTES (tmp) = dwarf;
20386 offset += 8;
20387 j += 2;
20389 else if (j != PC_REGNUM)
20391 /* Emit a single word load. */
20392 if (offset > 0)
20393 mem = gen_frame_mem (SImode,
20394 plus_constant (Pmode,
20395 stack_pointer_rtx,
20396 offset));
20397 else
20398 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20400 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20401 tmp = emit_insn (tmp);
20402 RTX_FRAME_RELATED_P (tmp) = 1;
20404 /* Generate dwarf info. */
20405 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20406 gen_rtx_REG (SImode, j),
20407 NULL_RTX);
20409 offset += 4;
20410 j += 1;
20412 else /* j == PC_REGNUM */
20413 j++;
20415 else
20416 j++;
20418 /* Update the stack. */
20419 if (offset > 0)
20421 tmp = gen_rtx_SET (Pmode,
20422 stack_pointer_rtx,
20423 plus_constant (Pmode,
20424 stack_pointer_rtx,
20425 offset));
20426 tmp = emit_insn (tmp);
20427 arm_add_cfa_adjust_cfa_note (tmp, offset,
20428 stack_pointer_rtx, stack_pointer_rtx);
20429 offset = 0;
20432 if (saved_regs_mask & (1 << PC_REGNUM))
20434 /* Only PC is to be popped. */
20435 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20436 XVECEXP (par, 0, 0) = ret_rtx;
20437 tmp = gen_rtx_SET (SImode,
20438 gen_rtx_REG (SImode, PC_REGNUM),
20439 gen_frame_mem (SImode,
20440 gen_rtx_POST_INC (SImode,
20441 stack_pointer_rtx)));
20442 RTX_FRAME_RELATED_P (tmp) = 1;
20443 XVECEXP (par, 0, 1) = tmp;
20444 par = emit_jump_insn (par);
20446 /* Generate dwarf info. */
20447 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20448 gen_rtx_REG (SImode, PC_REGNUM),
20449 NULL_RTX);
20450 REG_NOTES (par) = dwarf;
20451 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20452 stack_pointer_rtx, stack_pointer_rtx);
20456 /* Calculate the size of the return value that is passed in registers. */
20457 static unsigned
20458 arm_size_return_regs (void)
20460 enum machine_mode mode;
20462 if (crtl->return_rtx != 0)
20463 mode = GET_MODE (crtl->return_rtx);
20464 else
20465 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20467 return GET_MODE_SIZE (mode);
20470 /* Return true if the current function needs to save/restore LR. */
20471 static bool
20472 thumb_force_lr_save (void)
20474 return !cfun->machine->lr_save_eliminated
20475 && (!leaf_function_p ()
20476 || thumb_far_jump_used_p ()
20477 || df_regs_ever_live_p (LR_REGNUM));
20480 /* We do not know if r3 will be available because
20481 we do have an indirect tailcall happening in this
20482 particular case. */
20483 static bool
20484 is_indirect_tailcall_p (rtx call)
20486 rtx pat = PATTERN (call);
20488 /* Indirect tail call. */
20489 pat = XVECEXP (pat, 0, 0);
20490 if (GET_CODE (pat) == SET)
20491 pat = SET_SRC (pat);
20493 pat = XEXP (XEXP (pat, 0), 0);
20494 return REG_P (pat);
20497 /* Return true if r3 is used by any of the tail call insns in the
20498 current function. */
20499 static bool
20500 any_sibcall_could_use_r3 (void)
20502 edge_iterator ei;
20503 edge e;
20505 if (!crtl->tail_call_emit)
20506 return false;
20507 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20508 if (e->flags & EDGE_SIBCALL)
20510 rtx call = BB_END (e->src);
20511 if (!CALL_P (call))
20512 call = prev_nonnote_nondebug_insn (call);
20513 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20514 if (find_regno_fusage (call, USE, 3)
20515 || is_indirect_tailcall_p (call))
20516 return true;
20518 return false;
20522 /* Compute the distance from register FROM to register TO.
20523 These can be the arg pointer (26), the soft frame pointer (25),
20524 the stack pointer (13) or the hard frame pointer (11).
20525 In thumb mode r7 is used as the soft frame pointer, if needed.
20526 Typical stack layout looks like this:
20528 old stack pointer -> | |
20529 ----
20530 | | \
20531 | | saved arguments for
20532 | | vararg functions
20533 | | /
20535 hard FP & arg pointer -> | | \
20536 | | stack
20537 | | frame
20538 | | /
20540 | | \
20541 | | call saved
20542 | | registers
20543 soft frame pointer -> | | /
20545 | | \
20546 | | local
20547 | | variables
20548 locals base pointer -> | | /
20550 | | \
20551 | | outgoing
20552 | | arguments
20553 current stack pointer -> | | /
20556 For a given function some or all of these stack components
20557 may not be needed, giving rise to the possibility of
20558 eliminating some of the registers.
20560 The values returned by this function must reflect the behavior
20561 of arm_expand_prologue() and arm_compute_save_reg_mask().
20563 The sign of the number returned reflects the direction of stack
20564 growth, so the values are positive for all eliminations except
20565 from the soft frame pointer to the hard frame pointer.
20567 SFP may point just inside the local variables block to ensure correct
20568 alignment. */
20571 /* Calculate stack offsets. These are used to calculate register elimination
20572 offsets and in prologue/epilogue code. Also calculates which registers
20573 should be saved. */
20575 static arm_stack_offsets *
20576 arm_get_frame_offsets (void)
20578 struct arm_stack_offsets *offsets;
20579 unsigned long func_type;
20580 int leaf;
20581 int saved;
20582 int core_saved;
20583 HOST_WIDE_INT frame_size;
20584 int i;
20586 offsets = &cfun->machine->stack_offsets;
20588 /* We need to know if we are a leaf function. Unfortunately, it
20589 is possible to be called after start_sequence has been called,
20590 which causes get_insns to return the insns for the sequence,
20591 not the function, which will cause leaf_function_p to return
20592 the incorrect result.
20594 to know about leaf functions once reload has completed, and the
20595 frame size cannot be changed after that time, so we can safely
20596 use the cached value. */
20598 if (reload_completed)
20599 return offsets;
20601 /* Initially this is the size of the local variables. It will translated
20602 into an offset once we have determined the size of preceding data. */
20603 frame_size = ROUND_UP_WORD (get_frame_size ());
20605 leaf = leaf_function_p ();
20607 /* Space for variadic functions. */
20608 offsets->saved_args = crtl->args.pretend_args_size;
20610 /* In Thumb mode this is incorrect, but never used. */
20611 offsets->frame
20612 = (offsets->saved_args
20613 + arm_compute_static_chain_stack_bytes ()
20614 + (frame_pointer_needed ? 4 : 0));
20616 if (TARGET_32BIT)
20618 unsigned int regno;
20620 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20621 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20622 saved = core_saved;
20624 /* We know that SP will be doubleword aligned on entry, and we must
20625 preserve that condition at any subroutine call. We also require the
20626 soft frame pointer to be doubleword aligned. */
20628 if (TARGET_REALLY_IWMMXT)
20630 /* Check for the call-saved iWMMXt registers. */
20631 for (regno = FIRST_IWMMXT_REGNUM;
20632 regno <= LAST_IWMMXT_REGNUM;
20633 regno++)
20634 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20635 saved += 8;
20638 func_type = arm_current_func_type ();
20639 /* Space for saved VFP registers. */
20640 if (! IS_VOLATILE (func_type)
20641 && TARGET_HARD_FLOAT && TARGET_VFP)
20642 saved += arm_get_vfp_saved_size ();
20644 else /* TARGET_THUMB1 */
20646 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20647 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20648 saved = core_saved;
20649 if (TARGET_BACKTRACE)
20650 saved += 16;
20653 /* Saved registers include the stack frame. */
20654 offsets->saved_regs
20655 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20656 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20658 /* A leaf function does not need any stack alignment if it has nothing
20659 on the stack. */
20660 if (leaf && frame_size == 0
20661 /* However if it calls alloca(), we have a dynamically allocated
20662 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20663 && ! cfun->calls_alloca)
20665 offsets->outgoing_args = offsets->soft_frame;
20666 offsets->locals_base = offsets->soft_frame;
20667 return offsets;
20670 /* Ensure SFP has the correct alignment. */
20671 if (ARM_DOUBLEWORD_ALIGN
20672 && (offsets->soft_frame & 7))
20674 offsets->soft_frame += 4;
20675 /* Try to align stack by pushing an extra reg. Don't bother doing this
20676 when there is a stack frame as the alignment will be rolled into
20677 the normal stack adjustment. */
20678 if (frame_size + crtl->outgoing_args_size == 0)
20680 int reg = -1;
20682 /* If it is safe to use r3, then do so. This sometimes
20683 generates better code on Thumb-2 by avoiding the need to
20684 use 32-bit push/pop instructions. */
20685 if (! any_sibcall_could_use_r3 ()
20686 && arm_size_return_regs () <= 12
20687 && (offsets->saved_regs_mask & (1 << 3)) == 0
20688 && (TARGET_THUMB2
20689 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20691 reg = 3;
20693 else
20694 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20696 /* Avoid fixed registers; they may be changed at
20697 arbitrary times so it's unsafe to restore them
20698 during the epilogue. */
20699 if (!fixed_regs[i]
20700 && (offsets->saved_regs_mask & (1 << i)) == 0)
20702 reg = i;
20703 break;
20707 if (reg != -1)
20709 offsets->saved_regs += 4;
20710 offsets->saved_regs_mask |= (1 << reg);
20715 offsets->locals_base = offsets->soft_frame + frame_size;
20716 offsets->outgoing_args = (offsets->locals_base
20717 + crtl->outgoing_args_size);
20719 if (ARM_DOUBLEWORD_ALIGN)
20721 /* Ensure SP remains doubleword aligned. */
20722 if (offsets->outgoing_args & 7)
20723 offsets->outgoing_args += 4;
20724 gcc_assert (!(offsets->outgoing_args & 7));
20727 return offsets;
20731 /* Calculate the relative offsets for the different stack pointers. Positive
20732 offsets are in the direction of stack growth. */
20734 HOST_WIDE_INT
20735 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20737 arm_stack_offsets *offsets;
20739 offsets = arm_get_frame_offsets ();
20741 /* OK, now we have enough information to compute the distances.
20742 There must be an entry in these switch tables for each pair
20743 of registers in ELIMINABLE_REGS, even if some of the entries
20744 seem to be redundant or useless. */
20745 switch (from)
20747 case ARG_POINTER_REGNUM:
20748 switch (to)
20750 case THUMB_HARD_FRAME_POINTER_REGNUM:
20751 return 0;
20753 case FRAME_POINTER_REGNUM:
20754 /* This is the reverse of the soft frame pointer
20755 to hard frame pointer elimination below. */
20756 return offsets->soft_frame - offsets->saved_args;
20758 case ARM_HARD_FRAME_POINTER_REGNUM:
20759 /* This is only non-zero in the case where the static chain register
20760 is stored above the frame. */
20761 return offsets->frame - offsets->saved_args - 4;
20763 case STACK_POINTER_REGNUM:
20764 /* If nothing has been pushed on the stack at all
20765 then this will return -4. This *is* correct! */
20766 return offsets->outgoing_args - (offsets->saved_args + 4);
20768 default:
20769 gcc_unreachable ();
20771 gcc_unreachable ();
20773 case FRAME_POINTER_REGNUM:
20774 switch (to)
20776 case THUMB_HARD_FRAME_POINTER_REGNUM:
20777 return 0;
20779 case ARM_HARD_FRAME_POINTER_REGNUM:
20780 /* The hard frame pointer points to the top entry in the
20781 stack frame. The soft frame pointer to the bottom entry
20782 in the stack frame. If there is no stack frame at all,
20783 then they are identical. */
20785 return offsets->frame - offsets->soft_frame;
20787 case STACK_POINTER_REGNUM:
20788 return offsets->outgoing_args - offsets->soft_frame;
20790 default:
20791 gcc_unreachable ();
20793 gcc_unreachable ();
20795 default:
20796 /* You cannot eliminate from the stack pointer.
20797 In theory you could eliminate from the hard frame
20798 pointer to the stack pointer, but this will never
20799 happen, since if a stack frame is not needed the
20800 hard frame pointer will never be used. */
20801 gcc_unreachable ();
20805 /* Given FROM and TO register numbers, say whether this elimination is
20806 allowed. Frame pointer elimination is automatically handled.
20808 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20809 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20810 pointer, we must eliminate FRAME_POINTER_REGNUM into
20811 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20812 ARG_POINTER_REGNUM. */
20814 bool
20815 arm_can_eliminate (const int from, const int to)
20817 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20818 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20819 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20820 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20821 true);
20824 /* Emit RTL to save coprocessor registers on function entry. Returns the
20825 number of bytes pushed. */
20827 static int
20828 arm_save_coproc_regs(void)
20830 int saved_size = 0;
20831 unsigned reg;
20832 unsigned start_reg;
20833 rtx insn;
20835 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20836 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20838 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20839 insn = gen_rtx_MEM (V2SImode, insn);
20840 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20841 RTX_FRAME_RELATED_P (insn) = 1;
20842 saved_size += 8;
20845 if (TARGET_HARD_FLOAT && TARGET_VFP)
20847 start_reg = FIRST_VFP_REGNUM;
20849 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20851 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20852 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20854 if (start_reg != reg)
20855 saved_size += vfp_emit_fstmd (start_reg,
20856 (reg - start_reg) / 2);
20857 start_reg = reg + 2;
20860 if (start_reg != reg)
20861 saved_size += vfp_emit_fstmd (start_reg,
20862 (reg - start_reg) / 2);
20864 return saved_size;
20868 /* Set the Thumb frame pointer from the stack pointer. */
20870 static void
20871 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20873 HOST_WIDE_INT amount;
20874 rtx insn, dwarf;
20876 amount = offsets->outgoing_args - offsets->locals_base;
20877 if (amount < 1024)
20878 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20879 stack_pointer_rtx, GEN_INT (amount)));
20880 else
20882 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20883 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20884 expects the first two operands to be the same. */
20885 if (TARGET_THUMB2)
20887 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20888 stack_pointer_rtx,
20889 hard_frame_pointer_rtx));
20891 else
20893 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20894 hard_frame_pointer_rtx,
20895 stack_pointer_rtx));
20897 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20898 plus_constant (Pmode, stack_pointer_rtx, amount));
20899 RTX_FRAME_RELATED_P (dwarf) = 1;
20900 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20903 RTX_FRAME_RELATED_P (insn) = 1;
20906 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20907 function. */
20908 void
20909 arm_expand_prologue (void)
20911 rtx amount;
20912 rtx insn;
20913 rtx ip_rtx;
20914 unsigned long live_regs_mask;
20915 unsigned long func_type;
20916 int fp_offset = 0;
20917 int saved_pretend_args = 0;
20918 int saved_regs = 0;
20919 unsigned HOST_WIDE_INT args_to_push;
20920 arm_stack_offsets *offsets;
20922 func_type = arm_current_func_type ();
20924 /* Naked functions don't have prologues. */
20925 if (IS_NAKED (func_type))
20926 return;
20928 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20929 args_to_push = crtl->args.pretend_args_size;
20931 /* Compute which register we will have to save onto the stack. */
20932 offsets = arm_get_frame_offsets ();
20933 live_regs_mask = offsets->saved_regs_mask;
20935 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20937 if (IS_STACKALIGN (func_type))
20939 rtx r0, r1;
20941 /* Handle a word-aligned stack pointer. We generate the following:
20943 mov r0, sp
20944 bic r1, r0, #7
20945 mov sp, r1
20946 <save and restore r0 in normal prologue/epilogue>
20947 mov sp, r0
20948 bx lr
20950 The unwinder doesn't need to know about the stack realignment.
20951 Just tell it we saved SP in r0. */
20952 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20954 r0 = gen_rtx_REG (SImode, 0);
20955 r1 = gen_rtx_REG (SImode, 1);
20957 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20958 RTX_FRAME_RELATED_P (insn) = 1;
20959 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20961 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20963 /* ??? The CFA changes here, which may cause GDB to conclude that it
20964 has entered a different function. That said, the unwind info is
20965 correct, individually, before and after this instruction because
20966 we've described the save of SP, which will override the default
20967 handling of SP as restoring from the CFA. */
20968 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20971 /* For APCS frames, if IP register is clobbered
20972 when creating frame, save that register in a special
20973 way. */
20974 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20976 if (IS_INTERRUPT (func_type))
20978 /* Interrupt functions must not corrupt any registers.
20979 Creating a frame pointer however, corrupts the IP
20980 register, so we must push it first. */
20981 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20983 /* Do not set RTX_FRAME_RELATED_P on this insn.
20984 The dwarf stack unwinding code only wants to see one
20985 stack decrement per function, and this is not it. If
20986 this instruction is labeled as being part of the frame
20987 creation sequence then dwarf2out_frame_debug_expr will
20988 die when it encounters the assignment of IP to FP
20989 later on, since the use of SP here establishes SP as
20990 the CFA register and not IP.
20992 Anyway this instruction is not really part of the stack
20993 frame creation although it is part of the prologue. */
20995 else if (IS_NESTED (func_type))
20997 /* The static chain register is the same as the IP register
20998 used as a scratch register during stack frame creation.
20999 To get around this need to find somewhere to store IP
21000 whilst the frame is being created. We try the following
21001 places in order:
21003 1. The last argument register r3 if it is available.
21004 2. A slot on the stack above the frame if there are no
21005 arguments to push onto the stack.
21006 3. Register r3 again, after pushing the argument registers
21007 onto the stack, if this is a varargs function.
21008 4. The last slot on the stack created for the arguments to
21009 push, if this isn't a varargs function.
21011 Note - we only need to tell the dwarf2 backend about the SP
21012 adjustment in the second variant; the static chain register
21013 doesn't need to be unwound, as it doesn't contain a value
21014 inherited from the caller. */
21016 if (!arm_r3_live_at_start_p ())
21017 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21018 else if (args_to_push == 0)
21020 rtx addr, dwarf;
21022 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21023 saved_regs += 4;
21025 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21026 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21027 fp_offset = 4;
21029 /* Just tell the dwarf backend that we adjusted SP. */
21030 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21031 plus_constant (Pmode, stack_pointer_rtx,
21032 -fp_offset));
21033 RTX_FRAME_RELATED_P (insn) = 1;
21034 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21036 else
21038 /* Store the args on the stack. */
21039 if (cfun->machine->uses_anonymous_args)
21041 insn
21042 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21043 (0xf0 >> (args_to_push / 4)) & 0xf);
21044 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21045 saved_pretend_args = 1;
21047 else
21049 rtx addr, dwarf;
21051 if (args_to_push == 4)
21052 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21053 else
21054 addr
21055 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21056 plus_constant (Pmode,
21057 stack_pointer_rtx,
21058 -args_to_push));
21060 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21062 /* Just tell the dwarf backend that we adjusted SP. */
21063 dwarf
21064 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21065 plus_constant (Pmode, stack_pointer_rtx,
21066 -args_to_push));
21067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21070 RTX_FRAME_RELATED_P (insn) = 1;
21071 fp_offset = args_to_push;
21072 args_to_push = 0;
21076 insn = emit_set_insn (ip_rtx,
21077 plus_constant (Pmode, stack_pointer_rtx,
21078 fp_offset));
21079 RTX_FRAME_RELATED_P (insn) = 1;
21082 if (args_to_push)
21084 /* Push the argument registers, or reserve space for them. */
21085 if (cfun->machine->uses_anonymous_args)
21086 insn = emit_multi_reg_push
21087 ((0xf0 >> (args_to_push / 4)) & 0xf,
21088 (0xf0 >> (args_to_push / 4)) & 0xf);
21089 else
21090 insn = emit_insn
21091 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21092 GEN_INT (- args_to_push)));
21093 RTX_FRAME_RELATED_P (insn) = 1;
21096 /* If this is an interrupt service routine, and the link register
21097 is going to be pushed, and we're not generating extra
21098 push of IP (needed when frame is needed and frame layout if apcs),
21099 subtracting four from LR now will mean that the function return
21100 can be done with a single instruction. */
21101 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21102 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21103 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21104 && TARGET_ARM)
21106 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21108 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21111 if (live_regs_mask)
21113 unsigned long dwarf_regs_mask = live_regs_mask;
21115 saved_regs += bit_count (live_regs_mask) * 4;
21116 if (optimize_size && !frame_pointer_needed
21117 && saved_regs == offsets->saved_regs - offsets->saved_args)
21119 /* If no coprocessor registers are being pushed and we don't have
21120 to worry about a frame pointer then push extra registers to
21121 create the stack frame. This is done is a way that does not
21122 alter the frame layout, so is independent of the epilogue. */
21123 int n;
21124 int frame;
21125 n = 0;
21126 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21127 n++;
21128 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21129 if (frame && n * 4 >= frame)
21131 n = frame / 4;
21132 live_regs_mask |= (1 << n) - 1;
21133 saved_regs += frame;
21137 if (TARGET_LDRD
21138 && current_tune->prefer_ldrd_strd
21139 && !optimize_function_for_size_p (cfun))
21141 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21142 if (TARGET_THUMB2)
21143 thumb2_emit_strd_push (live_regs_mask);
21144 else if (TARGET_ARM
21145 && !TARGET_APCS_FRAME
21146 && !IS_INTERRUPT (func_type))
21147 arm_emit_strd_push (live_regs_mask);
21148 else
21150 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21151 RTX_FRAME_RELATED_P (insn) = 1;
21154 else
21156 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21157 RTX_FRAME_RELATED_P (insn) = 1;
21161 if (! IS_VOLATILE (func_type))
21162 saved_regs += arm_save_coproc_regs ();
21164 if (frame_pointer_needed && TARGET_ARM)
21166 /* Create the new frame pointer. */
21167 if (TARGET_APCS_FRAME)
21169 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21170 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21171 RTX_FRAME_RELATED_P (insn) = 1;
21173 if (IS_NESTED (func_type))
21175 /* Recover the static chain register. */
21176 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21177 insn = gen_rtx_REG (SImode, 3);
21178 else
21180 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21181 insn = gen_frame_mem (SImode, insn);
21183 emit_set_insn (ip_rtx, insn);
21184 /* Add a USE to stop propagate_one_insn() from barfing. */
21185 emit_insn (gen_force_register_use (ip_rtx));
21188 else
21190 insn = GEN_INT (saved_regs - 4);
21191 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21192 stack_pointer_rtx, insn));
21193 RTX_FRAME_RELATED_P (insn) = 1;
21197 if (flag_stack_usage_info)
21198 current_function_static_stack_size
21199 = offsets->outgoing_args - offsets->saved_args;
21201 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21203 /* This add can produce multiple insns for a large constant, so we
21204 need to get tricky. */
21205 rtx last = get_last_insn ();
21207 amount = GEN_INT (offsets->saved_args + saved_regs
21208 - offsets->outgoing_args);
21210 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21211 amount));
21214 last = last ? NEXT_INSN (last) : get_insns ();
21215 RTX_FRAME_RELATED_P (last) = 1;
21217 while (last != insn);
21219 /* If the frame pointer is needed, emit a special barrier that
21220 will prevent the scheduler from moving stores to the frame
21221 before the stack adjustment. */
21222 if (frame_pointer_needed)
21223 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21224 hard_frame_pointer_rtx));
21228 if (frame_pointer_needed && TARGET_THUMB2)
21229 thumb_set_frame_pointer (offsets);
21231 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21233 unsigned long mask;
21235 mask = live_regs_mask;
21236 mask &= THUMB2_WORK_REGS;
21237 if (!IS_NESTED (func_type))
21238 mask |= (1 << IP_REGNUM);
21239 arm_load_pic_register (mask);
21242 /* If we are profiling, make sure no instructions are scheduled before
21243 the call to mcount. Similarly if the user has requested no
21244 scheduling in the prolog. Similarly if we want non-call exceptions
21245 using the EABI unwinder, to prevent faulting instructions from being
21246 swapped with a stack adjustment. */
21247 if (crtl->profile || !TARGET_SCHED_PROLOG
21248 || (arm_except_unwind_info (&global_options) == UI_TARGET
21249 && cfun->can_throw_non_call_exceptions))
21250 emit_insn (gen_blockage ());
21252 /* If the link register is being kept alive, with the return address in it,
21253 then make sure that it does not get reused by the ce2 pass. */
21254 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21255 cfun->machine->lr_save_eliminated = 1;
21258 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21259 static void
21260 arm_print_condition (FILE *stream)
21262 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21264 /* Branch conversion is not implemented for Thumb-2. */
21265 if (TARGET_THUMB)
21267 output_operand_lossage ("predicated Thumb instruction");
21268 return;
21270 if (current_insn_predicate != NULL)
21272 output_operand_lossage
21273 ("predicated instruction in conditional sequence");
21274 return;
21277 fputs (arm_condition_codes[arm_current_cc], stream);
21279 else if (current_insn_predicate)
21281 enum arm_cond_code code;
21283 if (TARGET_THUMB1)
21285 output_operand_lossage ("predicated Thumb instruction");
21286 return;
21289 code = get_arm_condition_code (current_insn_predicate);
21290 fputs (arm_condition_codes[code], stream);
21295 /* Globally reserved letters: acln
21296 Puncutation letters currently used: @_|?().!#
21297 Lower case letters currently used: bcdefhimpqtvwxyz
21298 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21299 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21301 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21303 If CODE is 'd', then the X is a condition operand and the instruction
21304 should only be executed if the condition is true.
21305 if CODE is 'D', then the X is a condition operand and the instruction
21306 should only be executed if the condition is false: however, if the mode
21307 of the comparison is CCFPEmode, then always execute the instruction -- we
21308 do this because in these circumstances !GE does not necessarily imply LT;
21309 in these cases the instruction pattern will take care to make sure that
21310 an instruction containing %d will follow, thereby undoing the effects of
21311 doing this instruction unconditionally.
21312 If CODE is 'N' then X is a floating point operand that must be negated
21313 before output.
21314 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21315 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21316 static void
21317 arm_print_operand (FILE *stream, rtx x, int code)
21319 switch (code)
21321 case '@':
21322 fputs (ASM_COMMENT_START, stream);
21323 return;
21325 case '_':
21326 fputs (user_label_prefix, stream);
21327 return;
21329 case '|':
21330 fputs (REGISTER_PREFIX, stream);
21331 return;
21333 case '?':
21334 arm_print_condition (stream);
21335 return;
21337 case '(':
21338 /* Nothing in unified syntax, otherwise the current condition code. */
21339 if (!TARGET_UNIFIED_ASM)
21340 arm_print_condition (stream);
21341 break;
21343 case ')':
21344 /* The current condition code in unified syntax, otherwise nothing. */
21345 if (TARGET_UNIFIED_ASM)
21346 arm_print_condition (stream);
21347 break;
21349 case '.':
21350 /* The current condition code for a condition code setting instruction.
21351 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21352 if (TARGET_UNIFIED_ASM)
21354 fputc('s', stream);
21355 arm_print_condition (stream);
21357 else
21359 arm_print_condition (stream);
21360 fputc('s', stream);
21362 return;
21364 case '!':
21365 /* If the instruction is conditionally executed then print
21366 the current condition code, otherwise print 's'. */
21367 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21368 if (current_insn_predicate)
21369 arm_print_condition (stream);
21370 else
21371 fputc('s', stream);
21372 break;
21374 /* %# is a "break" sequence. It doesn't output anything, but is used to
21375 separate e.g. operand numbers from following text, if that text consists
21376 of further digits which we don't want to be part of the operand
21377 number. */
21378 case '#':
21379 return;
21381 case 'N':
21383 REAL_VALUE_TYPE r;
21384 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21385 r = real_value_negate (&r);
21386 fprintf (stream, "%s", fp_const_from_val (&r));
21388 return;
21390 /* An integer or symbol address without a preceding # sign. */
21391 case 'c':
21392 switch (GET_CODE (x))
21394 case CONST_INT:
21395 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21396 break;
21398 case SYMBOL_REF:
21399 output_addr_const (stream, x);
21400 break;
21402 case CONST:
21403 if (GET_CODE (XEXP (x, 0)) == PLUS
21404 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21406 output_addr_const (stream, x);
21407 break;
21409 /* Fall through. */
21411 default:
21412 output_operand_lossage ("Unsupported operand for code '%c'", code);
21414 return;
21416 /* An integer that we want to print in HEX. */
21417 case 'x':
21418 switch (GET_CODE (x))
21420 case CONST_INT:
21421 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21422 break;
21424 default:
21425 output_operand_lossage ("Unsupported operand for code '%c'", code);
21427 return;
21429 case 'B':
21430 if (CONST_INT_P (x))
21432 HOST_WIDE_INT val;
21433 val = ARM_SIGN_EXTEND (~INTVAL (x));
21434 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21436 else
21438 putc ('~', stream);
21439 output_addr_const (stream, x);
21441 return;
21443 case 'b':
21444 /* Print the log2 of a CONST_INT. */
21446 HOST_WIDE_INT val;
21448 if (!CONST_INT_P (x)
21449 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21450 output_operand_lossage ("Unsupported operand for code '%c'", code);
21451 else
21452 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21454 return;
21456 case 'L':
21457 /* The low 16 bits of an immediate constant. */
21458 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21459 return;
21461 case 'i':
21462 fprintf (stream, "%s", arithmetic_instr (x, 1));
21463 return;
21465 case 'I':
21466 fprintf (stream, "%s", arithmetic_instr (x, 0));
21467 return;
21469 case 'S':
21471 HOST_WIDE_INT val;
21472 const char *shift;
21474 shift = shift_op (x, &val);
21476 if (shift)
21478 fprintf (stream, ", %s ", shift);
21479 if (val == -1)
21480 arm_print_operand (stream, XEXP (x, 1), 0);
21481 else
21482 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21485 return;
21487 /* An explanation of the 'Q', 'R' and 'H' register operands:
21489 In a pair of registers containing a DI or DF value the 'Q'
21490 operand returns the register number of the register containing
21491 the least significant part of the value. The 'R' operand returns
21492 the register number of the register containing the most
21493 significant part of the value.
21495 The 'H' operand returns the higher of the two register numbers.
21496 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21497 same as the 'Q' operand, since the most significant part of the
21498 value is held in the lower number register. The reverse is true
21499 on systems where WORDS_BIG_ENDIAN is false.
21501 The purpose of these operands is to distinguish between cases
21502 where the endian-ness of the values is important (for example
21503 when they are added together), and cases where the endian-ness
21504 is irrelevant, but the order of register operations is important.
21505 For example when loading a value from memory into a register
21506 pair, the endian-ness does not matter. Provided that the value
21507 from the lower memory address is put into the lower numbered
21508 register, and the value from the higher address is put into the
21509 higher numbered register, the load will work regardless of whether
21510 the value being loaded is big-wordian or little-wordian. The
21511 order of the two register loads can matter however, if the address
21512 of the memory location is actually held in one of the registers
21513 being overwritten by the load.
21515 The 'Q' and 'R' constraints are also available for 64-bit
21516 constants. */
21517 case 'Q':
21518 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21520 rtx part = gen_lowpart (SImode, x);
21521 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21522 return;
21525 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21527 output_operand_lossage ("invalid operand for code '%c'", code);
21528 return;
21531 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21532 return;
21534 case 'R':
21535 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21537 enum machine_mode mode = GET_MODE (x);
21538 rtx part;
21540 if (mode == VOIDmode)
21541 mode = DImode;
21542 part = gen_highpart_mode (SImode, mode, x);
21543 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21544 return;
21547 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21549 output_operand_lossage ("invalid operand for code '%c'", code);
21550 return;
21553 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21554 return;
21556 case 'H':
21557 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21559 output_operand_lossage ("invalid operand for code '%c'", code);
21560 return;
21563 asm_fprintf (stream, "%r", REGNO (x) + 1);
21564 return;
21566 case 'J':
21567 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21569 output_operand_lossage ("invalid operand for code '%c'", code);
21570 return;
21573 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21574 return;
21576 case 'K':
21577 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21579 output_operand_lossage ("invalid operand for code '%c'", code);
21580 return;
21583 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21584 return;
21586 case 'm':
21587 asm_fprintf (stream, "%r",
21588 REG_P (XEXP (x, 0))
21589 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21590 return;
21592 case 'M':
21593 asm_fprintf (stream, "{%r-%r}",
21594 REGNO (x),
21595 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21596 return;
21598 /* Like 'M', but writing doubleword vector registers, for use by Neon
21599 insns. */
21600 case 'h':
21602 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21603 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21604 if (numregs == 1)
21605 asm_fprintf (stream, "{d%d}", regno);
21606 else
21607 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21609 return;
21611 case 'd':
21612 /* CONST_TRUE_RTX means always -- that's the default. */
21613 if (x == const_true_rtx)
21614 return;
21616 if (!COMPARISON_P (x))
21618 output_operand_lossage ("invalid operand for code '%c'", code);
21619 return;
21622 fputs (arm_condition_codes[get_arm_condition_code (x)],
21623 stream);
21624 return;
21626 case 'D':
21627 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21628 want to do that. */
21629 if (x == const_true_rtx)
21631 output_operand_lossage ("instruction never executed");
21632 return;
21634 if (!COMPARISON_P (x))
21636 output_operand_lossage ("invalid operand for code '%c'", code);
21637 return;
21640 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21641 (get_arm_condition_code (x))],
21642 stream);
21643 return;
21645 case 's':
21646 case 'V':
21647 case 'W':
21648 case 'X':
21649 case 'Y':
21650 case 'Z':
21651 /* Former Maverick support, removed after GCC-4.7. */
21652 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21653 return;
21655 case 'U':
21656 if (!REG_P (x)
21657 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21658 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21659 /* Bad value for wCG register number. */
21661 output_operand_lossage ("invalid operand for code '%c'", code);
21662 return;
21665 else
21666 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21667 return;
21669 /* Print an iWMMXt control register name. */
21670 case 'w':
21671 if (!CONST_INT_P (x)
21672 || INTVAL (x) < 0
21673 || INTVAL (x) >= 16)
21674 /* Bad value for wC register number. */
21676 output_operand_lossage ("invalid operand for code '%c'", code);
21677 return;
21680 else
21682 static const char * wc_reg_names [16] =
21684 "wCID", "wCon", "wCSSF", "wCASF",
21685 "wC4", "wC5", "wC6", "wC7",
21686 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21687 "wC12", "wC13", "wC14", "wC15"
21690 fputs (wc_reg_names [INTVAL (x)], stream);
21692 return;
21694 /* Print the high single-precision register of a VFP double-precision
21695 register. */
21696 case 'p':
21698 enum machine_mode mode = GET_MODE (x);
21699 int regno;
21701 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21703 output_operand_lossage ("invalid operand for code '%c'", code);
21704 return;
21707 regno = REGNO (x);
21708 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21710 output_operand_lossage ("invalid operand for code '%c'", code);
21711 return;
21714 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21716 return;
21718 /* Print a VFP/Neon double precision or quad precision register name. */
21719 case 'P':
21720 case 'q':
21722 enum machine_mode mode = GET_MODE (x);
21723 int is_quad = (code == 'q');
21724 int regno;
21726 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21728 output_operand_lossage ("invalid operand for code '%c'", code);
21729 return;
21732 if (!REG_P (x)
21733 || !IS_VFP_REGNUM (REGNO (x)))
21735 output_operand_lossage ("invalid operand for code '%c'", code);
21736 return;
21739 regno = REGNO (x);
21740 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21741 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21743 output_operand_lossage ("invalid operand for code '%c'", code);
21744 return;
21747 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21748 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21750 return;
21752 /* These two codes print the low/high doubleword register of a Neon quad
21753 register, respectively. For pair-structure types, can also print
21754 low/high quadword registers. */
21755 case 'e':
21756 case 'f':
21758 enum machine_mode mode = GET_MODE (x);
21759 int regno;
21761 if ((GET_MODE_SIZE (mode) != 16
21762 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21764 output_operand_lossage ("invalid operand for code '%c'", code);
21765 return;
21768 regno = REGNO (x);
21769 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21771 output_operand_lossage ("invalid operand for code '%c'", code);
21772 return;
21775 if (GET_MODE_SIZE (mode) == 16)
21776 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21777 + (code == 'f' ? 1 : 0));
21778 else
21779 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21780 + (code == 'f' ? 1 : 0));
21782 return;
21784 /* Print a VFPv3 floating-point constant, represented as an integer
21785 index. */
21786 case 'G':
21788 int index = vfp3_const_double_index (x);
21789 gcc_assert (index != -1);
21790 fprintf (stream, "%d", index);
21792 return;
21794 /* Print bits representing opcode features for Neon.
21796 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21797 and polynomials as unsigned.
21799 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21801 Bit 2 is 1 for rounding functions, 0 otherwise. */
21803 /* Identify the type as 's', 'u', 'p' or 'f'. */
21804 case 'T':
21806 HOST_WIDE_INT bits = INTVAL (x);
21807 fputc ("uspf"[bits & 3], stream);
21809 return;
21811 /* Likewise, but signed and unsigned integers are both 'i'. */
21812 case 'F':
21814 HOST_WIDE_INT bits = INTVAL (x);
21815 fputc ("iipf"[bits & 3], stream);
21817 return;
21819 /* As for 'T', but emit 'u' instead of 'p'. */
21820 case 't':
21822 HOST_WIDE_INT bits = INTVAL (x);
21823 fputc ("usuf"[bits & 3], stream);
21825 return;
21827 /* Bit 2: rounding (vs none). */
21828 case 'O':
21830 HOST_WIDE_INT bits = INTVAL (x);
21831 fputs ((bits & 4) != 0 ? "r" : "", stream);
21833 return;
21835 /* Memory operand for vld1/vst1 instruction. */
21836 case 'A':
21838 rtx addr;
21839 bool postinc = FALSE;
21840 rtx postinc_reg = NULL;
21841 unsigned align, memsize, align_bits;
21843 gcc_assert (MEM_P (x));
21844 addr = XEXP (x, 0);
21845 if (GET_CODE (addr) == POST_INC)
21847 postinc = 1;
21848 addr = XEXP (addr, 0);
21850 if (GET_CODE (addr) == POST_MODIFY)
21852 postinc_reg = XEXP( XEXP (addr, 1), 1);
21853 addr = XEXP (addr, 0);
21855 asm_fprintf (stream, "[%r", REGNO (addr));
21857 /* We know the alignment of this access, so we can emit a hint in the
21858 instruction (for some alignments) as an aid to the memory subsystem
21859 of the target. */
21860 align = MEM_ALIGN (x) >> 3;
21861 memsize = MEM_SIZE (x);
21863 /* Only certain alignment specifiers are supported by the hardware. */
21864 if (memsize == 32 && (align % 32) == 0)
21865 align_bits = 256;
21866 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21867 align_bits = 128;
21868 else if (memsize >= 8 && (align % 8) == 0)
21869 align_bits = 64;
21870 else
21871 align_bits = 0;
21873 if (align_bits != 0)
21874 asm_fprintf (stream, ":%d", align_bits);
21876 asm_fprintf (stream, "]");
21878 if (postinc)
21879 fputs("!", stream);
21880 if (postinc_reg)
21881 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21883 return;
21885 case 'C':
21887 rtx addr;
21889 gcc_assert (MEM_P (x));
21890 addr = XEXP (x, 0);
21891 gcc_assert (REG_P (addr));
21892 asm_fprintf (stream, "[%r]", REGNO (addr));
21894 return;
21896 /* Translate an S register number into a D register number and element index. */
21897 case 'y':
21899 enum machine_mode mode = GET_MODE (x);
21900 int regno;
21902 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21904 output_operand_lossage ("invalid operand for code '%c'", code);
21905 return;
21908 regno = REGNO (x);
21909 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21911 output_operand_lossage ("invalid operand for code '%c'", code);
21912 return;
21915 regno = regno - FIRST_VFP_REGNUM;
21916 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21918 return;
21920 case 'v':
21921 gcc_assert (CONST_DOUBLE_P (x));
21922 int result;
21923 result = vfp3_const_double_for_fract_bits (x);
21924 if (result == 0)
21925 result = vfp3_const_double_for_bits (x);
21926 fprintf (stream, "#%d", result);
21927 return;
21929 /* Register specifier for vld1.16/vst1.16. Translate the S register
21930 number into a D register number and element index. */
21931 case 'z':
21933 enum machine_mode mode = GET_MODE (x);
21934 int regno;
21936 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21938 output_operand_lossage ("invalid operand for code '%c'", code);
21939 return;
21942 regno = REGNO (x);
21943 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21945 output_operand_lossage ("invalid operand for code '%c'", code);
21946 return;
21949 regno = regno - FIRST_VFP_REGNUM;
21950 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21952 return;
21954 default:
21955 if (x == 0)
21957 output_operand_lossage ("missing operand");
21958 return;
21961 switch (GET_CODE (x))
21963 case REG:
21964 asm_fprintf (stream, "%r", REGNO (x));
21965 break;
21967 case MEM:
21968 output_memory_reference_mode = GET_MODE (x);
21969 output_address (XEXP (x, 0));
21970 break;
21972 case CONST_DOUBLE:
21973 if (TARGET_NEON)
21975 char fpstr[20];
21976 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21977 sizeof (fpstr), 0, 1);
21978 fprintf (stream, "#%s", fpstr);
21980 else
21981 fprintf (stream, "#%s", fp_immediate_constant (x));
21982 break;
21984 default:
21985 gcc_assert (GET_CODE (x) != NEG);
21986 fputc ('#', stream);
21987 if (GET_CODE (x) == HIGH)
21989 fputs (":lower16:", stream);
21990 x = XEXP (x, 0);
21993 output_addr_const (stream, x);
21994 break;
21999 /* Target hook for printing a memory address. */
22000 static void
22001 arm_print_operand_address (FILE *stream, rtx x)
22003 if (TARGET_32BIT)
22005 int is_minus = GET_CODE (x) == MINUS;
22007 if (REG_P (x))
22008 asm_fprintf (stream, "[%r]", REGNO (x));
22009 else if (GET_CODE (x) == PLUS || is_minus)
22011 rtx base = XEXP (x, 0);
22012 rtx index = XEXP (x, 1);
22013 HOST_WIDE_INT offset = 0;
22014 if (!REG_P (base)
22015 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22017 /* Ensure that BASE is a register. */
22018 /* (one of them must be). */
22019 /* Also ensure the SP is not used as in index register. */
22020 rtx temp = base;
22021 base = index;
22022 index = temp;
22024 switch (GET_CODE (index))
22026 case CONST_INT:
22027 offset = INTVAL (index);
22028 if (is_minus)
22029 offset = -offset;
22030 asm_fprintf (stream, "[%r, #%wd]",
22031 REGNO (base), offset);
22032 break;
22034 case REG:
22035 asm_fprintf (stream, "[%r, %s%r]",
22036 REGNO (base), is_minus ? "-" : "",
22037 REGNO (index));
22038 break;
22040 case MULT:
22041 case ASHIFTRT:
22042 case LSHIFTRT:
22043 case ASHIFT:
22044 case ROTATERT:
22046 asm_fprintf (stream, "[%r, %s%r",
22047 REGNO (base), is_minus ? "-" : "",
22048 REGNO (XEXP (index, 0)));
22049 arm_print_operand (stream, index, 'S');
22050 fputs ("]", stream);
22051 break;
22054 default:
22055 gcc_unreachable ();
22058 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22059 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22061 extern enum machine_mode output_memory_reference_mode;
22063 gcc_assert (REG_P (XEXP (x, 0)));
22065 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22066 asm_fprintf (stream, "[%r, #%s%d]!",
22067 REGNO (XEXP (x, 0)),
22068 GET_CODE (x) == PRE_DEC ? "-" : "",
22069 GET_MODE_SIZE (output_memory_reference_mode));
22070 else
22071 asm_fprintf (stream, "[%r], #%s%d",
22072 REGNO (XEXP (x, 0)),
22073 GET_CODE (x) == POST_DEC ? "-" : "",
22074 GET_MODE_SIZE (output_memory_reference_mode));
22076 else if (GET_CODE (x) == PRE_MODIFY)
22078 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22079 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22080 asm_fprintf (stream, "#%wd]!",
22081 INTVAL (XEXP (XEXP (x, 1), 1)));
22082 else
22083 asm_fprintf (stream, "%r]!",
22084 REGNO (XEXP (XEXP (x, 1), 1)));
22086 else if (GET_CODE (x) == POST_MODIFY)
22088 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22089 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22090 asm_fprintf (stream, "#%wd",
22091 INTVAL (XEXP (XEXP (x, 1), 1)));
22092 else
22093 asm_fprintf (stream, "%r",
22094 REGNO (XEXP (XEXP (x, 1), 1)));
22096 else output_addr_const (stream, x);
22098 else
22100 if (REG_P (x))
22101 asm_fprintf (stream, "[%r]", REGNO (x));
22102 else if (GET_CODE (x) == POST_INC)
22103 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22104 else if (GET_CODE (x) == PLUS)
22106 gcc_assert (REG_P (XEXP (x, 0)));
22107 if (CONST_INT_P (XEXP (x, 1)))
22108 asm_fprintf (stream, "[%r, #%wd]",
22109 REGNO (XEXP (x, 0)),
22110 INTVAL (XEXP (x, 1)));
22111 else
22112 asm_fprintf (stream, "[%r, %r]",
22113 REGNO (XEXP (x, 0)),
22114 REGNO (XEXP (x, 1)));
22116 else
22117 output_addr_const (stream, x);
22121 /* Target hook for indicating whether a punctuation character for
22122 TARGET_PRINT_OPERAND is valid. */
22123 static bool
22124 arm_print_operand_punct_valid_p (unsigned char code)
22126 return (code == '@' || code == '|' || code == '.'
22127 || code == '(' || code == ')' || code == '#'
22128 || (TARGET_32BIT && (code == '?'))
22129 || (TARGET_THUMB2 && (code == '!'))
22130 || (TARGET_THUMB && (code == '_')));
22133 /* Target hook for assembling integer objects. The ARM version needs to
22134 handle word-sized values specially. */
22135 static bool
22136 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22138 enum machine_mode mode;
22140 if (size == UNITS_PER_WORD && aligned_p)
22142 fputs ("\t.word\t", asm_out_file);
22143 output_addr_const (asm_out_file, x);
22145 /* Mark symbols as position independent. We only do this in the
22146 .text segment, not in the .data segment. */
22147 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22148 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22150 /* See legitimize_pic_address for an explanation of the
22151 TARGET_VXWORKS_RTP check. */
22152 if (!arm_pic_data_is_text_relative
22153 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22154 fputs ("(GOT)", asm_out_file);
22155 else
22156 fputs ("(GOTOFF)", asm_out_file);
22158 fputc ('\n', asm_out_file);
22159 return true;
22162 mode = GET_MODE (x);
22164 if (arm_vector_mode_supported_p (mode))
22166 int i, units;
22168 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22170 units = CONST_VECTOR_NUNITS (x);
22171 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22173 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22174 for (i = 0; i < units; i++)
22176 rtx elt = CONST_VECTOR_ELT (x, i);
22177 assemble_integer
22178 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22180 else
22181 for (i = 0; i < units; i++)
22183 rtx elt = CONST_VECTOR_ELT (x, i);
22184 REAL_VALUE_TYPE rval;
22186 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22188 assemble_real
22189 (rval, GET_MODE_INNER (mode),
22190 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22193 return true;
22196 return default_assemble_integer (x, size, aligned_p);
22199 static void
22200 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22202 section *s;
22204 if (!TARGET_AAPCS_BASED)
22206 (is_ctor ?
22207 default_named_section_asm_out_constructor
22208 : default_named_section_asm_out_destructor) (symbol, priority);
22209 return;
22212 /* Put these in the .init_array section, using a special relocation. */
22213 if (priority != DEFAULT_INIT_PRIORITY)
22215 char buf[18];
22216 sprintf (buf, "%s.%.5u",
22217 is_ctor ? ".init_array" : ".fini_array",
22218 priority);
22219 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22221 else if (is_ctor)
22222 s = ctors_section;
22223 else
22224 s = dtors_section;
22226 switch_to_section (s);
22227 assemble_align (POINTER_SIZE);
22228 fputs ("\t.word\t", asm_out_file);
22229 output_addr_const (asm_out_file, symbol);
22230 fputs ("(target1)\n", asm_out_file);
22233 /* Add a function to the list of static constructors. */
22235 static void
22236 arm_elf_asm_constructor (rtx symbol, int priority)
22238 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22241 /* Add a function to the list of static destructors. */
22243 static void
22244 arm_elf_asm_destructor (rtx symbol, int priority)
22246 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22249 /* A finite state machine takes care of noticing whether or not instructions
22250 can be conditionally executed, and thus decrease execution time and code
22251 size by deleting branch instructions. The fsm is controlled by
22252 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22254 /* The state of the fsm controlling condition codes are:
22255 0: normal, do nothing special
22256 1: make ASM_OUTPUT_OPCODE not output this instruction
22257 2: make ASM_OUTPUT_OPCODE not output this instruction
22258 3: make instructions conditional
22259 4: make instructions conditional
22261 State transitions (state->state by whom under condition):
22262 0 -> 1 final_prescan_insn if the `target' is a label
22263 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22264 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22265 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22266 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22267 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22268 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22269 (the target insn is arm_target_insn).
22271 If the jump clobbers the conditions then we use states 2 and 4.
22273 A similar thing can be done with conditional return insns.
22275 XXX In case the `target' is an unconditional branch, this conditionalising
22276 of the instructions always reduces code size, but not always execution
22277 time. But then, I want to reduce the code size to somewhere near what
22278 /bin/cc produces. */
22280 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22281 instructions. When a COND_EXEC instruction is seen the subsequent
22282 instructions are scanned so that multiple conditional instructions can be
22283 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22284 specify the length and true/false mask for the IT block. These will be
22285 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22287 /* Returns the index of the ARM condition code string in
22288 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22289 COMPARISON should be an rtx like `(eq (...) (...))'. */
22291 enum arm_cond_code
22292 maybe_get_arm_condition_code (rtx comparison)
22294 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22295 enum arm_cond_code code;
22296 enum rtx_code comp_code = GET_CODE (comparison);
22298 if (GET_MODE_CLASS (mode) != MODE_CC)
22299 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22300 XEXP (comparison, 1));
22302 switch (mode)
22304 case CC_DNEmode: code = ARM_NE; goto dominance;
22305 case CC_DEQmode: code = ARM_EQ; goto dominance;
22306 case CC_DGEmode: code = ARM_GE; goto dominance;
22307 case CC_DGTmode: code = ARM_GT; goto dominance;
22308 case CC_DLEmode: code = ARM_LE; goto dominance;
22309 case CC_DLTmode: code = ARM_LT; goto dominance;
22310 case CC_DGEUmode: code = ARM_CS; goto dominance;
22311 case CC_DGTUmode: code = ARM_HI; goto dominance;
22312 case CC_DLEUmode: code = ARM_LS; goto dominance;
22313 case CC_DLTUmode: code = ARM_CC;
22315 dominance:
22316 if (comp_code == EQ)
22317 return ARM_INVERSE_CONDITION_CODE (code);
22318 if (comp_code == NE)
22319 return code;
22320 return ARM_NV;
22322 case CC_NOOVmode:
22323 switch (comp_code)
22325 case NE: return ARM_NE;
22326 case EQ: return ARM_EQ;
22327 case GE: return ARM_PL;
22328 case LT: return ARM_MI;
22329 default: return ARM_NV;
22332 case CC_Zmode:
22333 switch (comp_code)
22335 case NE: return ARM_NE;
22336 case EQ: return ARM_EQ;
22337 default: return ARM_NV;
22340 case CC_Nmode:
22341 switch (comp_code)
22343 case NE: return ARM_MI;
22344 case EQ: return ARM_PL;
22345 default: return ARM_NV;
22348 case CCFPEmode:
22349 case CCFPmode:
22350 /* We can handle all cases except UNEQ and LTGT. */
22351 switch (comp_code)
22353 case GE: return ARM_GE;
22354 case GT: return ARM_GT;
22355 case LE: return ARM_LS;
22356 case LT: return ARM_MI;
22357 case NE: return ARM_NE;
22358 case EQ: return ARM_EQ;
22359 case ORDERED: return ARM_VC;
22360 case UNORDERED: return ARM_VS;
22361 case UNLT: return ARM_LT;
22362 case UNLE: return ARM_LE;
22363 case UNGT: return ARM_HI;
22364 case UNGE: return ARM_PL;
22365 /* UNEQ and LTGT do not have a representation. */
22366 case UNEQ: /* Fall through. */
22367 case LTGT: /* Fall through. */
22368 default: return ARM_NV;
22371 case CC_SWPmode:
22372 switch (comp_code)
22374 case NE: return ARM_NE;
22375 case EQ: return ARM_EQ;
22376 case GE: return ARM_LE;
22377 case GT: return ARM_LT;
22378 case LE: return ARM_GE;
22379 case LT: return ARM_GT;
22380 case GEU: return ARM_LS;
22381 case GTU: return ARM_CC;
22382 case LEU: return ARM_CS;
22383 case LTU: return ARM_HI;
22384 default: return ARM_NV;
22387 case CC_Cmode:
22388 switch (comp_code)
22390 case LTU: return ARM_CS;
22391 case GEU: return ARM_CC;
22392 default: return ARM_NV;
22395 case CC_CZmode:
22396 switch (comp_code)
22398 case NE: return ARM_NE;
22399 case EQ: return ARM_EQ;
22400 case GEU: return ARM_CS;
22401 case GTU: return ARM_HI;
22402 case LEU: return ARM_LS;
22403 case LTU: return ARM_CC;
22404 default: return ARM_NV;
22407 case CC_NCVmode:
22408 switch (comp_code)
22410 case GE: return ARM_GE;
22411 case LT: return ARM_LT;
22412 case GEU: return ARM_CS;
22413 case LTU: return ARM_CC;
22414 default: return ARM_NV;
22417 case CCmode:
22418 switch (comp_code)
22420 case NE: return ARM_NE;
22421 case EQ: return ARM_EQ;
22422 case GE: return ARM_GE;
22423 case GT: return ARM_GT;
22424 case LE: return ARM_LE;
22425 case LT: return ARM_LT;
22426 case GEU: return ARM_CS;
22427 case GTU: return ARM_HI;
22428 case LEU: return ARM_LS;
22429 case LTU: return ARM_CC;
22430 default: return ARM_NV;
22433 default: gcc_unreachable ();
22437 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22438 static enum arm_cond_code
22439 get_arm_condition_code (rtx comparison)
22441 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22442 gcc_assert (code != ARM_NV);
22443 return code;
22446 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22447 instructions. */
22448 void
22449 thumb2_final_prescan_insn (rtx insn)
22451 rtx first_insn = insn;
22452 rtx body = PATTERN (insn);
22453 rtx predicate;
22454 enum arm_cond_code code;
22455 int n;
22456 int mask;
22457 int max;
22459 /* max_insns_skipped in the tune was already taken into account in the
22460 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22461 just emit the IT blocks as we can. It does not make sense to split
22462 the IT blocks. */
22463 max = MAX_INSN_PER_IT_BLOCK;
22465 /* Remove the previous insn from the count of insns to be output. */
22466 if (arm_condexec_count)
22467 arm_condexec_count--;
22469 /* Nothing to do if we are already inside a conditional block. */
22470 if (arm_condexec_count)
22471 return;
22473 if (GET_CODE (body) != COND_EXEC)
22474 return;
22476 /* Conditional jumps are implemented directly. */
22477 if (JUMP_P (insn))
22478 return;
22480 predicate = COND_EXEC_TEST (body);
22481 arm_current_cc = get_arm_condition_code (predicate);
22483 n = get_attr_ce_count (insn);
22484 arm_condexec_count = 1;
22485 arm_condexec_mask = (1 << n) - 1;
22486 arm_condexec_masklen = n;
22487 /* See if subsequent instructions can be combined into the same block. */
22488 for (;;)
22490 insn = next_nonnote_insn (insn);
22492 /* Jumping into the middle of an IT block is illegal, so a label or
22493 barrier terminates the block. */
22494 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22495 break;
22497 body = PATTERN (insn);
22498 /* USE and CLOBBER aren't really insns, so just skip them. */
22499 if (GET_CODE (body) == USE
22500 || GET_CODE (body) == CLOBBER)
22501 continue;
22503 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22504 if (GET_CODE (body) != COND_EXEC)
22505 break;
22506 /* Maximum number of conditionally executed instructions in a block. */
22507 n = get_attr_ce_count (insn);
22508 if (arm_condexec_masklen + n > max)
22509 break;
22511 predicate = COND_EXEC_TEST (body);
22512 code = get_arm_condition_code (predicate);
22513 mask = (1 << n) - 1;
22514 if (arm_current_cc == code)
22515 arm_condexec_mask |= (mask << arm_condexec_masklen);
22516 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22517 break;
22519 arm_condexec_count++;
22520 arm_condexec_masklen += n;
22522 /* A jump must be the last instruction in a conditional block. */
22523 if (JUMP_P (insn))
22524 break;
22526 /* Restore recog_data (getting the attributes of other insns can
22527 destroy this array, but final.c assumes that it remains intact
22528 across this call). */
22529 extract_constrain_insn_cached (first_insn);
22532 void
22533 arm_final_prescan_insn (rtx insn)
22535 /* BODY will hold the body of INSN. */
22536 rtx body = PATTERN (insn);
22538 /* This will be 1 if trying to repeat the trick, and things need to be
22539 reversed if it appears to fail. */
22540 int reverse = 0;
22542 /* If we start with a return insn, we only succeed if we find another one. */
22543 int seeking_return = 0;
22544 enum rtx_code return_code = UNKNOWN;
22546 /* START_INSN will hold the insn from where we start looking. This is the
22547 first insn after the following code_label if REVERSE is true. */
22548 rtx start_insn = insn;
22550 /* If in state 4, check if the target branch is reached, in order to
22551 change back to state 0. */
22552 if (arm_ccfsm_state == 4)
22554 if (insn == arm_target_insn)
22556 arm_target_insn = NULL;
22557 arm_ccfsm_state = 0;
22559 return;
22562 /* If in state 3, it is possible to repeat the trick, if this insn is an
22563 unconditional branch to a label, and immediately following this branch
22564 is the previous target label which is only used once, and the label this
22565 branch jumps to is not too far off. */
22566 if (arm_ccfsm_state == 3)
22568 if (simplejump_p (insn))
22570 start_insn = next_nonnote_insn (start_insn);
22571 if (BARRIER_P (start_insn))
22573 /* XXX Isn't this always a barrier? */
22574 start_insn = next_nonnote_insn (start_insn);
22576 if (LABEL_P (start_insn)
22577 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22578 && LABEL_NUSES (start_insn) == 1)
22579 reverse = TRUE;
22580 else
22581 return;
22583 else if (ANY_RETURN_P (body))
22585 start_insn = next_nonnote_insn (start_insn);
22586 if (BARRIER_P (start_insn))
22587 start_insn = next_nonnote_insn (start_insn);
22588 if (LABEL_P (start_insn)
22589 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22590 && LABEL_NUSES (start_insn) == 1)
22592 reverse = TRUE;
22593 seeking_return = 1;
22594 return_code = GET_CODE (body);
22596 else
22597 return;
22599 else
22600 return;
22603 gcc_assert (!arm_ccfsm_state || reverse);
22604 if (!JUMP_P (insn))
22605 return;
22607 /* This jump might be paralleled with a clobber of the condition codes
22608 the jump should always come first */
22609 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22610 body = XVECEXP (body, 0, 0);
22612 if (reverse
22613 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22614 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22616 int insns_skipped;
22617 int fail = FALSE, succeed = FALSE;
22618 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22619 int then_not_else = TRUE;
22620 rtx this_insn = start_insn, label = 0;
22622 /* Register the insn jumped to. */
22623 if (reverse)
22625 if (!seeking_return)
22626 label = XEXP (SET_SRC (body), 0);
22628 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22629 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22630 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22632 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22633 then_not_else = FALSE;
22635 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22637 seeking_return = 1;
22638 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22640 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22642 seeking_return = 1;
22643 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22644 then_not_else = FALSE;
22646 else
22647 gcc_unreachable ();
22649 /* See how many insns this branch skips, and what kind of insns. If all
22650 insns are okay, and the label or unconditional branch to the same
22651 label is not too far away, succeed. */
22652 for (insns_skipped = 0;
22653 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22655 rtx scanbody;
22657 this_insn = next_nonnote_insn (this_insn);
22658 if (!this_insn)
22659 break;
22661 switch (GET_CODE (this_insn))
22663 case CODE_LABEL:
22664 /* Succeed if it is the target label, otherwise fail since
22665 control falls in from somewhere else. */
22666 if (this_insn == label)
22668 arm_ccfsm_state = 1;
22669 succeed = TRUE;
22671 else
22672 fail = TRUE;
22673 break;
22675 case BARRIER:
22676 /* Succeed if the following insn is the target label.
22677 Otherwise fail.
22678 If return insns are used then the last insn in a function
22679 will be a barrier. */
22680 this_insn = next_nonnote_insn (this_insn);
22681 if (this_insn && this_insn == label)
22683 arm_ccfsm_state = 1;
22684 succeed = TRUE;
22686 else
22687 fail = TRUE;
22688 break;
22690 case CALL_INSN:
22691 /* The AAPCS says that conditional calls should not be
22692 used since they make interworking inefficient (the
22693 linker can't transform BL<cond> into BLX). That's
22694 only a problem if the machine has BLX. */
22695 if (arm_arch5)
22697 fail = TRUE;
22698 break;
22701 /* Succeed if the following insn is the target label, or
22702 if the following two insns are a barrier and the
22703 target label. */
22704 this_insn = next_nonnote_insn (this_insn);
22705 if (this_insn && BARRIER_P (this_insn))
22706 this_insn = next_nonnote_insn (this_insn);
22708 if (this_insn && this_insn == label
22709 && insns_skipped < max_insns_skipped)
22711 arm_ccfsm_state = 1;
22712 succeed = TRUE;
22714 else
22715 fail = TRUE;
22716 break;
22718 case JUMP_INSN:
22719 /* If this is an unconditional branch to the same label, succeed.
22720 If it is to another label, do nothing. If it is conditional,
22721 fail. */
22722 /* XXX Probably, the tests for SET and the PC are
22723 unnecessary. */
22725 scanbody = PATTERN (this_insn);
22726 if (GET_CODE (scanbody) == SET
22727 && GET_CODE (SET_DEST (scanbody)) == PC)
22729 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22730 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22732 arm_ccfsm_state = 2;
22733 succeed = TRUE;
22735 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22736 fail = TRUE;
22738 /* Fail if a conditional return is undesirable (e.g. on a
22739 StrongARM), but still allow this if optimizing for size. */
22740 else if (GET_CODE (scanbody) == return_code
22741 && !use_return_insn (TRUE, NULL)
22742 && !optimize_size)
22743 fail = TRUE;
22744 else if (GET_CODE (scanbody) == return_code)
22746 arm_ccfsm_state = 2;
22747 succeed = TRUE;
22749 else if (GET_CODE (scanbody) == PARALLEL)
22751 switch (get_attr_conds (this_insn))
22753 case CONDS_NOCOND:
22754 break;
22755 default:
22756 fail = TRUE;
22757 break;
22760 else
22761 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22763 break;
22765 case INSN:
22766 /* Instructions using or affecting the condition codes make it
22767 fail. */
22768 scanbody = PATTERN (this_insn);
22769 if (!(GET_CODE (scanbody) == SET
22770 || GET_CODE (scanbody) == PARALLEL)
22771 || get_attr_conds (this_insn) != CONDS_NOCOND)
22772 fail = TRUE;
22773 break;
22775 default:
22776 break;
22779 if (succeed)
22781 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22782 arm_target_label = CODE_LABEL_NUMBER (label);
22783 else
22785 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22787 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22789 this_insn = next_nonnote_insn (this_insn);
22790 gcc_assert (!this_insn
22791 || (!BARRIER_P (this_insn)
22792 && !LABEL_P (this_insn)));
22794 if (!this_insn)
22796 /* Oh, dear! we ran off the end.. give up. */
22797 extract_constrain_insn_cached (insn);
22798 arm_ccfsm_state = 0;
22799 arm_target_insn = NULL;
22800 return;
22802 arm_target_insn = this_insn;
22805 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22806 what it was. */
22807 if (!reverse)
22808 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22810 if (reverse || then_not_else)
22811 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22814 /* Restore recog_data (getting the attributes of other insns can
22815 destroy this array, but final.c assumes that it remains intact
22816 across this call. */
22817 extract_constrain_insn_cached (insn);
22821 /* Output IT instructions. */
22822 void
22823 thumb2_asm_output_opcode (FILE * stream)
22825 char buff[5];
22826 int n;
22828 if (arm_condexec_mask)
22830 for (n = 0; n < arm_condexec_masklen; n++)
22831 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22832 buff[n] = 0;
22833 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22834 arm_condition_codes[arm_current_cc]);
22835 arm_condexec_mask = 0;
22839 /* Returns true if REGNO is a valid register
22840 for holding a quantity of type MODE. */
22842 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22844 if (GET_MODE_CLASS (mode) == MODE_CC)
22845 return (regno == CC_REGNUM
22846 || (TARGET_HARD_FLOAT && TARGET_VFP
22847 && regno == VFPCC_REGNUM));
22849 if (TARGET_THUMB1)
22850 /* For the Thumb we only allow values bigger than SImode in
22851 registers 0 - 6, so that there is always a second low
22852 register available to hold the upper part of the value.
22853 We probably we ought to ensure that the register is the
22854 start of an even numbered register pair. */
22855 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22857 if (TARGET_HARD_FLOAT && TARGET_VFP
22858 && IS_VFP_REGNUM (regno))
22860 if (mode == SFmode || mode == SImode)
22861 return VFP_REGNO_OK_FOR_SINGLE (regno);
22863 if (mode == DFmode)
22864 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22866 /* VFP registers can hold HFmode values, but there is no point in
22867 putting them there unless we have hardware conversion insns. */
22868 if (mode == HFmode)
22869 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22871 if (TARGET_NEON)
22872 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22873 || (VALID_NEON_QREG_MODE (mode)
22874 && NEON_REGNO_OK_FOR_QUAD (regno))
22875 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22876 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22877 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22878 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22879 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22881 return FALSE;
22884 if (TARGET_REALLY_IWMMXT)
22886 if (IS_IWMMXT_GR_REGNUM (regno))
22887 return mode == SImode;
22889 if (IS_IWMMXT_REGNUM (regno))
22890 return VALID_IWMMXT_REG_MODE (mode);
22893 /* We allow almost any value to be stored in the general registers.
22894 Restrict doubleword quantities to even register pairs in ARM state
22895 so that we can use ldrd. Do not allow very large Neon structure
22896 opaque modes in general registers; they would use too many. */
22897 if (regno <= LAST_ARM_REGNUM)
22899 if (ARM_NUM_REGS (mode) > 4)
22900 return FALSE;
22902 if (TARGET_THUMB2)
22903 return TRUE;
22905 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22908 if (regno == FRAME_POINTER_REGNUM
22909 || regno == ARG_POINTER_REGNUM)
22910 /* We only allow integers in the fake hard registers. */
22911 return GET_MODE_CLASS (mode) == MODE_INT;
22913 return FALSE;
22916 /* Implement MODES_TIEABLE_P. */
22918 bool
22919 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22921 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22922 return true;
22924 /* We specifically want to allow elements of "structure" modes to
22925 be tieable to the structure. This more general condition allows
22926 other rarer situations too. */
22927 if (TARGET_NEON
22928 && (VALID_NEON_DREG_MODE (mode1)
22929 || VALID_NEON_QREG_MODE (mode1)
22930 || VALID_NEON_STRUCT_MODE (mode1))
22931 && (VALID_NEON_DREG_MODE (mode2)
22932 || VALID_NEON_QREG_MODE (mode2)
22933 || VALID_NEON_STRUCT_MODE (mode2)))
22934 return true;
22936 return false;
22939 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22940 not used in arm mode. */
22942 enum reg_class
22943 arm_regno_class (int regno)
22945 if (TARGET_THUMB1)
22947 if (regno == STACK_POINTER_REGNUM)
22948 return STACK_REG;
22949 if (regno == CC_REGNUM)
22950 return CC_REG;
22951 if (regno < 8)
22952 return LO_REGS;
22953 return HI_REGS;
22956 if (TARGET_THUMB2 && regno < 8)
22957 return LO_REGS;
22959 if ( regno <= LAST_ARM_REGNUM
22960 || regno == FRAME_POINTER_REGNUM
22961 || regno == ARG_POINTER_REGNUM)
22962 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22964 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22965 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22967 if (IS_VFP_REGNUM (regno))
22969 if (regno <= D7_VFP_REGNUM)
22970 return VFP_D0_D7_REGS;
22971 else if (regno <= LAST_LO_VFP_REGNUM)
22972 return VFP_LO_REGS;
22973 else
22974 return VFP_HI_REGS;
22977 if (IS_IWMMXT_REGNUM (regno))
22978 return IWMMXT_REGS;
22980 if (IS_IWMMXT_GR_REGNUM (regno))
22981 return IWMMXT_GR_REGS;
22983 return NO_REGS;
22986 /* Handle a special case when computing the offset
22987 of an argument from the frame pointer. */
22989 arm_debugger_arg_offset (int value, rtx addr)
22991 rtx insn;
22993 /* We are only interested if dbxout_parms() failed to compute the offset. */
22994 if (value != 0)
22995 return 0;
22997 /* We can only cope with the case where the address is held in a register. */
22998 if (!REG_P (addr))
22999 return 0;
23001 /* If we are using the frame pointer to point at the argument, then
23002 an offset of 0 is correct. */
23003 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23004 return 0;
23006 /* If we are using the stack pointer to point at the
23007 argument, then an offset of 0 is correct. */
23008 /* ??? Check this is consistent with thumb2 frame layout. */
23009 if ((TARGET_THUMB || !frame_pointer_needed)
23010 && REGNO (addr) == SP_REGNUM)
23011 return 0;
23013 /* Oh dear. The argument is pointed to by a register rather
23014 than being held in a register, or being stored at a known
23015 offset from the frame pointer. Since GDB only understands
23016 those two kinds of argument we must translate the address
23017 held in the register into an offset from the frame pointer.
23018 We do this by searching through the insns for the function
23019 looking to see where this register gets its value. If the
23020 register is initialized from the frame pointer plus an offset
23021 then we are in luck and we can continue, otherwise we give up.
23023 This code is exercised by producing debugging information
23024 for a function with arguments like this:
23026 double func (double a, double b, int c, double d) {return d;}
23028 Without this code the stab for parameter 'd' will be set to
23029 an offset of 0 from the frame pointer, rather than 8. */
23031 /* The if() statement says:
23033 If the insn is a normal instruction
23034 and if the insn is setting the value in a register
23035 and if the register being set is the register holding the address of the argument
23036 and if the address is computing by an addition
23037 that involves adding to a register
23038 which is the frame pointer
23039 a constant integer
23041 then... */
23043 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23045 if ( NONJUMP_INSN_P (insn)
23046 && GET_CODE (PATTERN (insn)) == SET
23047 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23048 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23049 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23050 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23051 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23054 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23056 break;
23060 if (value == 0)
23062 debug_rtx (addr);
23063 warning (0, "unable to compute real location of stacked parameter");
23064 value = 8; /* XXX magic hack */
23067 return value;
23070 typedef enum {
23071 T_V8QI,
23072 T_V4HI,
23073 T_V4HF,
23074 T_V2SI,
23075 T_V2SF,
23076 T_DI,
23077 T_V16QI,
23078 T_V8HI,
23079 T_V4SI,
23080 T_V4SF,
23081 T_V2DI,
23082 T_TI,
23083 T_EI,
23084 T_OI,
23085 T_MAX /* Size of enum. Keep last. */
23086 } neon_builtin_type_mode;
23088 #define TYPE_MODE_BIT(X) (1 << (X))
23090 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23091 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23092 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23093 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23094 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23095 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23097 #define v8qi_UP T_V8QI
23098 #define v4hi_UP T_V4HI
23099 #define v4hf_UP T_V4HF
23100 #define v2si_UP T_V2SI
23101 #define v2sf_UP T_V2SF
23102 #define di_UP T_DI
23103 #define v16qi_UP T_V16QI
23104 #define v8hi_UP T_V8HI
23105 #define v4si_UP T_V4SI
23106 #define v4sf_UP T_V4SF
23107 #define v2di_UP T_V2DI
23108 #define ti_UP T_TI
23109 #define ei_UP T_EI
23110 #define oi_UP T_OI
23112 #define UP(X) X##_UP
23114 typedef enum {
23115 NEON_BINOP,
23116 NEON_TERNOP,
23117 NEON_UNOP,
23118 NEON_BSWAP,
23119 NEON_GETLANE,
23120 NEON_SETLANE,
23121 NEON_CREATE,
23122 NEON_RINT,
23123 NEON_DUP,
23124 NEON_DUPLANE,
23125 NEON_COMBINE,
23126 NEON_SPLIT,
23127 NEON_LANEMUL,
23128 NEON_LANEMULL,
23129 NEON_LANEMULH,
23130 NEON_LANEMAC,
23131 NEON_SCALARMUL,
23132 NEON_SCALARMULL,
23133 NEON_SCALARMULH,
23134 NEON_SCALARMAC,
23135 NEON_CONVERT,
23136 NEON_FLOAT_WIDEN,
23137 NEON_FLOAT_NARROW,
23138 NEON_FIXCONV,
23139 NEON_SELECT,
23140 NEON_REINTERP,
23141 NEON_VTBL,
23142 NEON_VTBX,
23143 NEON_LOAD1,
23144 NEON_LOAD1LANE,
23145 NEON_STORE1,
23146 NEON_STORE1LANE,
23147 NEON_LOADSTRUCT,
23148 NEON_LOADSTRUCTLANE,
23149 NEON_STORESTRUCT,
23150 NEON_STORESTRUCTLANE,
23151 NEON_LOGICBINOP,
23152 NEON_SHIFTINSERT,
23153 NEON_SHIFTIMM,
23154 NEON_SHIFTACC
23155 } neon_itype;
23157 typedef struct {
23158 const char *name;
23159 const neon_itype itype;
23160 const neon_builtin_type_mode mode;
23161 const enum insn_code code;
23162 unsigned int fcode;
23163 } neon_builtin_datum;
23165 #define CF(N,X) CODE_FOR_neon_##N##X
23167 #define VAR1(T, N, A) \
23168 {#N, NEON_##T, UP (A), CF (N, A), 0}
23169 #define VAR2(T, N, A, B) \
23170 VAR1 (T, N, A), \
23171 {#N, NEON_##T, UP (B), CF (N, B), 0}
23172 #define VAR3(T, N, A, B, C) \
23173 VAR2 (T, N, A, B), \
23174 {#N, NEON_##T, UP (C), CF (N, C), 0}
23175 #define VAR4(T, N, A, B, C, D) \
23176 VAR3 (T, N, A, B, C), \
23177 {#N, NEON_##T, UP (D), CF (N, D), 0}
23178 #define VAR5(T, N, A, B, C, D, E) \
23179 VAR4 (T, N, A, B, C, D), \
23180 {#N, NEON_##T, UP (E), CF (N, E), 0}
23181 #define VAR6(T, N, A, B, C, D, E, F) \
23182 VAR5 (T, N, A, B, C, D, E), \
23183 {#N, NEON_##T, UP (F), CF (N, F), 0}
23184 #define VAR7(T, N, A, B, C, D, E, F, G) \
23185 VAR6 (T, N, A, B, C, D, E, F), \
23186 {#N, NEON_##T, UP (G), CF (N, G), 0}
23187 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23188 VAR7 (T, N, A, B, C, D, E, F, G), \
23189 {#N, NEON_##T, UP (H), CF (N, H), 0}
23190 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23191 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23192 {#N, NEON_##T, UP (I), CF (N, I), 0}
23193 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23194 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23195 {#N, NEON_##T, UP (J), CF (N, J), 0}
23197 /* The NEON builtin data can be found in arm_neon_builtins.def.
23198 The mode entries in the following table correspond to the "key" type of the
23199 instruction variant, i.e. equivalent to that which would be specified after
23200 the assembler mnemonic, which usually refers to the last vector operand.
23201 (Signed/unsigned/polynomial types are not differentiated between though, and
23202 are all mapped onto the same mode for a given element size.) The modes
23203 listed per instruction should be the same as those defined for that
23204 instruction's pattern in neon.md. */
23206 static neon_builtin_datum neon_builtin_data[] =
23208 #include "arm_neon_builtins.def"
23211 #undef CF
23212 #undef VAR1
23213 #undef VAR2
23214 #undef VAR3
23215 #undef VAR4
23216 #undef VAR5
23217 #undef VAR6
23218 #undef VAR7
23219 #undef VAR8
23220 #undef VAR9
23221 #undef VAR10
23223 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23224 #define VAR1(T, N, A) \
23225 CF (N, A)
23226 #define VAR2(T, N, A, B) \
23227 VAR1 (T, N, A), \
23228 CF (N, B)
23229 #define VAR3(T, N, A, B, C) \
23230 VAR2 (T, N, A, B), \
23231 CF (N, C)
23232 #define VAR4(T, N, A, B, C, D) \
23233 VAR3 (T, N, A, B, C), \
23234 CF (N, D)
23235 #define VAR5(T, N, A, B, C, D, E) \
23236 VAR4 (T, N, A, B, C, D), \
23237 CF (N, E)
23238 #define VAR6(T, N, A, B, C, D, E, F) \
23239 VAR5 (T, N, A, B, C, D, E), \
23240 CF (N, F)
23241 #define VAR7(T, N, A, B, C, D, E, F, G) \
23242 VAR6 (T, N, A, B, C, D, E, F), \
23243 CF (N, G)
23244 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23245 VAR7 (T, N, A, B, C, D, E, F, G), \
23246 CF (N, H)
23247 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23248 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23249 CF (N, I)
23250 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23251 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23252 CF (N, J)
23253 enum arm_builtins
23255 ARM_BUILTIN_GETWCGR0,
23256 ARM_BUILTIN_GETWCGR1,
23257 ARM_BUILTIN_GETWCGR2,
23258 ARM_BUILTIN_GETWCGR3,
23260 ARM_BUILTIN_SETWCGR0,
23261 ARM_BUILTIN_SETWCGR1,
23262 ARM_BUILTIN_SETWCGR2,
23263 ARM_BUILTIN_SETWCGR3,
23265 ARM_BUILTIN_WZERO,
23267 ARM_BUILTIN_WAVG2BR,
23268 ARM_BUILTIN_WAVG2HR,
23269 ARM_BUILTIN_WAVG2B,
23270 ARM_BUILTIN_WAVG2H,
23272 ARM_BUILTIN_WACCB,
23273 ARM_BUILTIN_WACCH,
23274 ARM_BUILTIN_WACCW,
23276 ARM_BUILTIN_WMACS,
23277 ARM_BUILTIN_WMACSZ,
23278 ARM_BUILTIN_WMACU,
23279 ARM_BUILTIN_WMACUZ,
23281 ARM_BUILTIN_WSADB,
23282 ARM_BUILTIN_WSADBZ,
23283 ARM_BUILTIN_WSADH,
23284 ARM_BUILTIN_WSADHZ,
23286 ARM_BUILTIN_WALIGNI,
23287 ARM_BUILTIN_WALIGNR0,
23288 ARM_BUILTIN_WALIGNR1,
23289 ARM_BUILTIN_WALIGNR2,
23290 ARM_BUILTIN_WALIGNR3,
23292 ARM_BUILTIN_TMIA,
23293 ARM_BUILTIN_TMIAPH,
23294 ARM_BUILTIN_TMIABB,
23295 ARM_BUILTIN_TMIABT,
23296 ARM_BUILTIN_TMIATB,
23297 ARM_BUILTIN_TMIATT,
23299 ARM_BUILTIN_TMOVMSKB,
23300 ARM_BUILTIN_TMOVMSKH,
23301 ARM_BUILTIN_TMOVMSKW,
23303 ARM_BUILTIN_TBCSTB,
23304 ARM_BUILTIN_TBCSTH,
23305 ARM_BUILTIN_TBCSTW,
23307 ARM_BUILTIN_WMADDS,
23308 ARM_BUILTIN_WMADDU,
23310 ARM_BUILTIN_WPACKHSS,
23311 ARM_BUILTIN_WPACKWSS,
23312 ARM_BUILTIN_WPACKDSS,
23313 ARM_BUILTIN_WPACKHUS,
23314 ARM_BUILTIN_WPACKWUS,
23315 ARM_BUILTIN_WPACKDUS,
23317 ARM_BUILTIN_WADDB,
23318 ARM_BUILTIN_WADDH,
23319 ARM_BUILTIN_WADDW,
23320 ARM_BUILTIN_WADDSSB,
23321 ARM_BUILTIN_WADDSSH,
23322 ARM_BUILTIN_WADDSSW,
23323 ARM_BUILTIN_WADDUSB,
23324 ARM_BUILTIN_WADDUSH,
23325 ARM_BUILTIN_WADDUSW,
23326 ARM_BUILTIN_WSUBB,
23327 ARM_BUILTIN_WSUBH,
23328 ARM_BUILTIN_WSUBW,
23329 ARM_BUILTIN_WSUBSSB,
23330 ARM_BUILTIN_WSUBSSH,
23331 ARM_BUILTIN_WSUBSSW,
23332 ARM_BUILTIN_WSUBUSB,
23333 ARM_BUILTIN_WSUBUSH,
23334 ARM_BUILTIN_WSUBUSW,
23336 ARM_BUILTIN_WAND,
23337 ARM_BUILTIN_WANDN,
23338 ARM_BUILTIN_WOR,
23339 ARM_BUILTIN_WXOR,
23341 ARM_BUILTIN_WCMPEQB,
23342 ARM_BUILTIN_WCMPEQH,
23343 ARM_BUILTIN_WCMPEQW,
23344 ARM_BUILTIN_WCMPGTUB,
23345 ARM_BUILTIN_WCMPGTUH,
23346 ARM_BUILTIN_WCMPGTUW,
23347 ARM_BUILTIN_WCMPGTSB,
23348 ARM_BUILTIN_WCMPGTSH,
23349 ARM_BUILTIN_WCMPGTSW,
23351 ARM_BUILTIN_TEXTRMSB,
23352 ARM_BUILTIN_TEXTRMSH,
23353 ARM_BUILTIN_TEXTRMSW,
23354 ARM_BUILTIN_TEXTRMUB,
23355 ARM_BUILTIN_TEXTRMUH,
23356 ARM_BUILTIN_TEXTRMUW,
23357 ARM_BUILTIN_TINSRB,
23358 ARM_BUILTIN_TINSRH,
23359 ARM_BUILTIN_TINSRW,
23361 ARM_BUILTIN_WMAXSW,
23362 ARM_BUILTIN_WMAXSH,
23363 ARM_BUILTIN_WMAXSB,
23364 ARM_BUILTIN_WMAXUW,
23365 ARM_BUILTIN_WMAXUH,
23366 ARM_BUILTIN_WMAXUB,
23367 ARM_BUILTIN_WMINSW,
23368 ARM_BUILTIN_WMINSH,
23369 ARM_BUILTIN_WMINSB,
23370 ARM_BUILTIN_WMINUW,
23371 ARM_BUILTIN_WMINUH,
23372 ARM_BUILTIN_WMINUB,
23374 ARM_BUILTIN_WMULUM,
23375 ARM_BUILTIN_WMULSM,
23376 ARM_BUILTIN_WMULUL,
23378 ARM_BUILTIN_PSADBH,
23379 ARM_BUILTIN_WSHUFH,
23381 ARM_BUILTIN_WSLLH,
23382 ARM_BUILTIN_WSLLW,
23383 ARM_BUILTIN_WSLLD,
23384 ARM_BUILTIN_WSRAH,
23385 ARM_BUILTIN_WSRAW,
23386 ARM_BUILTIN_WSRAD,
23387 ARM_BUILTIN_WSRLH,
23388 ARM_BUILTIN_WSRLW,
23389 ARM_BUILTIN_WSRLD,
23390 ARM_BUILTIN_WRORH,
23391 ARM_BUILTIN_WRORW,
23392 ARM_BUILTIN_WRORD,
23393 ARM_BUILTIN_WSLLHI,
23394 ARM_BUILTIN_WSLLWI,
23395 ARM_BUILTIN_WSLLDI,
23396 ARM_BUILTIN_WSRAHI,
23397 ARM_BUILTIN_WSRAWI,
23398 ARM_BUILTIN_WSRADI,
23399 ARM_BUILTIN_WSRLHI,
23400 ARM_BUILTIN_WSRLWI,
23401 ARM_BUILTIN_WSRLDI,
23402 ARM_BUILTIN_WRORHI,
23403 ARM_BUILTIN_WRORWI,
23404 ARM_BUILTIN_WRORDI,
23406 ARM_BUILTIN_WUNPCKIHB,
23407 ARM_BUILTIN_WUNPCKIHH,
23408 ARM_BUILTIN_WUNPCKIHW,
23409 ARM_BUILTIN_WUNPCKILB,
23410 ARM_BUILTIN_WUNPCKILH,
23411 ARM_BUILTIN_WUNPCKILW,
23413 ARM_BUILTIN_WUNPCKEHSB,
23414 ARM_BUILTIN_WUNPCKEHSH,
23415 ARM_BUILTIN_WUNPCKEHSW,
23416 ARM_BUILTIN_WUNPCKEHUB,
23417 ARM_BUILTIN_WUNPCKEHUH,
23418 ARM_BUILTIN_WUNPCKEHUW,
23419 ARM_BUILTIN_WUNPCKELSB,
23420 ARM_BUILTIN_WUNPCKELSH,
23421 ARM_BUILTIN_WUNPCKELSW,
23422 ARM_BUILTIN_WUNPCKELUB,
23423 ARM_BUILTIN_WUNPCKELUH,
23424 ARM_BUILTIN_WUNPCKELUW,
23426 ARM_BUILTIN_WABSB,
23427 ARM_BUILTIN_WABSH,
23428 ARM_BUILTIN_WABSW,
23430 ARM_BUILTIN_WADDSUBHX,
23431 ARM_BUILTIN_WSUBADDHX,
23433 ARM_BUILTIN_WABSDIFFB,
23434 ARM_BUILTIN_WABSDIFFH,
23435 ARM_BUILTIN_WABSDIFFW,
23437 ARM_BUILTIN_WADDCH,
23438 ARM_BUILTIN_WADDCW,
23440 ARM_BUILTIN_WAVG4,
23441 ARM_BUILTIN_WAVG4R,
23443 ARM_BUILTIN_WMADDSX,
23444 ARM_BUILTIN_WMADDUX,
23446 ARM_BUILTIN_WMADDSN,
23447 ARM_BUILTIN_WMADDUN,
23449 ARM_BUILTIN_WMULWSM,
23450 ARM_BUILTIN_WMULWUM,
23452 ARM_BUILTIN_WMULWSMR,
23453 ARM_BUILTIN_WMULWUMR,
23455 ARM_BUILTIN_WMULWL,
23457 ARM_BUILTIN_WMULSMR,
23458 ARM_BUILTIN_WMULUMR,
23460 ARM_BUILTIN_WQMULM,
23461 ARM_BUILTIN_WQMULMR,
23463 ARM_BUILTIN_WQMULWM,
23464 ARM_BUILTIN_WQMULWMR,
23466 ARM_BUILTIN_WADDBHUSM,
23467 ARM_BUILTIN_WADDBHUSL,
23469 ARM_BUILTIN_WQMIABB,
23470 ARM_BUILTIN_WQMIABT,
23471 ARM_BUILTIN_WQMIATB,
23472 ARM_BUILTIN_WQMIATT,
23474 ARM_BUILTIN_WQMIABBN,
23475 ARM_BUILTIN_WQMIABTN,
23476 ARM_BUILTIN_WQMIATBN,
23477 ARM_BUILTIN_WQMIATTN,
23479 ARM_BUILTIN_WMIABB,
23480 ARM_BUILTIN_WMIABT,
23481 ARM_BUILTIN_WMIATB,
23482 ARM_BUILTIN_WMIATT,
23484 ARM_BUILTIN_WMIABBN,
23485 ARM_BUILTIN_WMIABTN,
23486 ARM_BUILTIN_WMIATBN,
23487 ARM_BUILTIN_WMIATTN,
23489 ARM_BUILTIN_WMIAWBB,
23490 ARM_BUILTIN_WMIAWBT,
23491 ARM_BUILTIN_WMIAWTB,
23492 ARM_BUILTIN_WMIAWTT,
23494 ARM_BUILTIN_WMIAWBBN,
23495 ARM_BUILTIN_WMIAWBTN,
23496 ARM_BUILTIN_WMIAWTBN,
23497 ARM_BUILTIN_WMIAWTTN,
23499 ARM_BUILTIN_WMERGE,
23501 ARM_BUILTIN_CRC32B,
23502 ARM_BUILTIN_CRC32H,
23503 ARM_BUILTIN_CRC32W,
23504 ARM_BUILTIN_CRC32CB,
23505 ARM_BUILTIN_CRC32CH,
23506 ARM_BUILTIN_CRC32CW,
23508 ARM_BUILTIN_GET_FPSCR,
23509 ARM_BUILTIN_SET_FPSCR,
23511 #undef CRYPTO1
23512 #undef CRYPTO2
23513 #undef CRYPTO3
23515 #define CRYPTO1(L, U, M1, M2) \
23516 ARM_BUILTIN_CRYPTO_##U,
23517 #define CRYPTO2(L, U, M1, M2, M3) \
23518 ARM_BUILTIN_CRYPTO_##U,
23519 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23520 ARM_BUILTIN_CRYPTO_##U,
23522 #include "crypto.def"
23524 #undef CRYPTO1
23525 #undef CRYPTO2
23526 #undef CRYPTO3
23528 #include "arm_neon_builtins.def"
23530 ,ARM_BUILTIN_MAX
23533 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23535 #undef CF
23536 #undef VAR1
23537 #undef VAR2
23538 #undef VAR3
23539 #undef VAR4
23540 #undef VAR5
23541 #undef VAR6
23542 #undef VAR7
23543 #undef VAR8
23544 #undef VAR9
23545 #undef VAR10
23547 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23549 #define NUM_DREG_TYPES 5
23550 #define NUM_QREG_TYPES 6
23552 static void
23553 arm_init_neon_builtins (void)
23555 unsigned int i, fcode;
23556 tree decl;
23558 tree neon_intQI_type_node;
23559 tree neon_intHI_type_node;
23560 tree neon_floatHF_type_node;
23561 tree neon_polyQI_type_node;
23562 tree neon_polyHI_type_node;
23563 tree neon_intSI_type_node;
23564 tree neon_intDI_type_node;
23565 tree neon_intUTI_type_node;
23566 tree neon_float_type_node;
23568 tree intQI_pointer_node;
23569 tree intHI_pointer_node;
23570 tree intSI_pointer_node;
23571 tree intDI_pointer_node;
23572 tree float_pointer_node;
23574 tree const_intQI_node;
23575 tree const_intHI_node;
23576 tree const_intSI_node;
23577 tree const_intDI_node;
23578 tree const_float_node;
23580 tree const_intQI_pointer_node;
23581 tree const_intHI_pointer_node;
23582 tree const_intSI_pointer_node;
23583 tree const_intDI_pointer_node;
23584 tree const_float_pointer_node;
23586 tree V8QI_type_node;
23587 tree V4HI_type_node;
23588 tree V4UHI_type_node;
23589 tree V4HF_type_node;
23590 tree V2SI_type_node;
23591 tree V2USI_type_node;
23592 tree V2SF_type_node;
23593 tree V16QI_type_node;
23594 tree V8HI_type_node;
23595 tree V8UHI_type_node;
23596 tree V4SI_type_node;
23597 tree V4USI_type_node;
23598 tree V4SF_type_node;
23599 tree V2DI_type_node;
23600 tree V2UDI_type_node;
23602 tree intUQI_type_node;
23603 tree intUHI_type_node;
23604 tree intUSI_type_node;
23605 tree intUDI_type_node;
23607 tree intEI_type_node;
23608 tree intOI_type_node;
23609 tree intCI_type_node;
23610 tree intXI_type_node;
23612 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23613 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23614 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23616 /* Create distinguished type nodes for NEON vector element types,
23617 and pointers to values of such types, so we can detect them later. */
23618 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23619 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23620 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23621 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23622 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23623 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23624 neon_float_type_node = make_node (REAL_TYPE);
23625 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23626 layout_type (neon_float_type_node);
23627 neon_floatHF_type_node = make_node (REAL_TYPE);
23628 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23629 layout_type (neon_floatHF_type_node);
23631 /* Define typedefs which exactly correspond to the modes we are basing vector
23632 types on. If you change these names you'll need to change
23633 the table used by arm_mangle_type too. */
23634 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23635 "__builtin_neon_qi");
23636 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23637 "__builtin_neon_hi");
23638 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23639 "__builtin_neon_hf");
23640 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23641 "__builtin_neon_si");
23642 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23643 "__builtin_neon_sf");
23644 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23645 "__builtin_neon_di");
23646 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23647 "__builtin_neon_poly8");
23648 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23649 "__builtin_neon_poly16");
23651 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23652 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23653 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23654 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23655 float_pointer_node = build_pointer_type (neon_float_type_node);
23657 /* Next create constant-qualified versions of the above types. */
23658 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23659 TYPE_QUAL_CONST);
23660 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23661 TYPE_QUAL_CONST);
23662 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23663 TYPE_QUAL_CONST);
23664 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23665 TYPE_QUAL_CONST);
23666 const_float_node = build_qualified_type (neon_float_type_node,
23667 TYPE_QUAL_CONST);
23669 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23670 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23671 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23672 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23673 const_float_pointer_node = build_pointer_type (const_float_node);
23675 /* Unsigned integer types for various mode sizes. */
23676 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23677 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23678 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23679 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23680 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23681 /* Now create vector types based on our NEON element types. */
23682 /* 64-bit vectors. */
23683 V8QI_type_node =
23684 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23685 V4HI_type_node =
23686 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23687 V4UHI_type_node =
23688 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23689 V4HF_type_node =
23690 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23691 V2SI_type_node =
23692 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23693 V2USI_type_node =
23694 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23695 V2SF_type_node =
23696 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23697 /* 128-bit vectors. */
23698 V16QI_type_node =
23699 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23700 V8HI_type_node =
23701 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23702 V8UHI_type_node =
23703 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23704 V4SI_type_node =
23705 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23706 V4USI_type_node =
23707 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23708 V4SF_type_node =
23709 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23710 V2DI_type_node =
23711 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23712 V2UDI_type_node =
23713 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23716 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23717 "__builtin_neon_uqi");
23718 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23719 "__builtin_neon_uhi");
23720 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23721 "__builtin_neon_usi");
23722 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23723 "__builtin_neon_udi");
23724 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23725 "__builtin_neon_poly64");
23726 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23727 "__builtin_neon_poly128");
23729 /* Opaque integer types for structures of vectors. */
23730 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23731 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23732 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23733 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23735 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23736 "__builtin_neon_ti");
23737 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23738 "__builtin_neon_ei");
23739 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23740 "__builtin_neon_oi");
23741 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23742 "__builtin_neon_ci");
23743 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23744 "__builtin_neon_xi");
23746 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23749 tree V16UQI_type_node =
23750 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23752 tree v16uqi_ftype_v16uqi
23753 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23755 tree v16uqi_ftype_v16uqi_v16uqi
23756 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23757 V16UQI_type_node, NULL_TREE);
23759 tree v4usi_ftype_v4usi
23760 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23762 tree v4usi_ftype_v4usi_v4usi
23763 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23764 V4USI_type_node, NULL_TREE);
23766 tree v4usi_ftype_v4usi_v4usi_v4usi
23767 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23768 V4USI_type_node, V4USI_type_node, NULL_TREE);
23770 tree uti_ftype_udi_udi
23771 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23772 intUDI_type_node, NULL_TREE);
23774 #undef CRYPTO1
23775 #undef CRYPTO2
23776 #undef CRYPTO3
23777 #undef C
23778 #undef N
23779 #undef CF
23780 #undef FT1
23781 #undef FT2
23782 #undef FT3
23784 #define C(U) \
23785 ARM_BUILTIN_CRYPTO_##U
23786 #define N(L) \
23787 "__builtin_arm_crypto_"#L
23788 #define FT1(R, A) \
23789 R##_ftype_##A
23790 #define FT2(R, A1, A2) \
23791 R##_ftype_##A1##_##A2
23792 #define FT3(R, A1, A2, A3) \
23793 R##_ftype_##A1##_##A2##_##A3
23794 #define CRYPTO1(L, U, R, A) \
23795 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23796 C (U), BUILT_IN_MD, \
23797 NULL, NULL_TREE);
23798 #define CRYPTO2(L, U, R, A1, A2) \
23799 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23800 C (U), BUILT_IN_MD, \
23801 NULL, NULL_TREE);
23803 #define CRYPTO3(L, U, R, A1, A2, A3) \
23804 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23805 C (U), BUILT_IN_MD, \
23806 NULL, NULL_TREE);
23807 #include "crypto.def"
23809 #undef CRYPTO1
23810 #undef CRYPTO2
23811 #undef CRYPTO3
23812 #undef C
23813 #undef N
23814 #undef FT1
23815 #undef FT2
23816 #undef FT3
23818 dreg_types[0] = V8QI_type_node;
23819 dreg_types[1] = V4HI_type_node;
23820 dreg_types[2] = V2SI_type_node;
23821 dreg_types[3] = V2SF_type_node;
23822 dreg_types[4] = neon_intDI_type_node;
23824 qreg_types[0] = V16QI_type_node;
23825 qreg_types[1] = V8HI_type_node;
23826 qreg_types[2] = V4SI_type_node;
23827 qreg_types[3] = V4SF_type_node;
23828 qreg_types[4] = V2DI_type_node;
23829 qreg_types[5] = neon_intUTI_type_node;
23831 for (i = 0; i < NUM_QREG_TYPES; i++)
23833 int j;
23834 for (j = 0; j < NUM_QREG_TYPES; j++)
23836 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23837 reinterp_ftype_dreg[i][j]
23838 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23840 reinterp_ftype_qreg[i][j]
23841 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23845 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23846 i < ARRAY_SIZE (neon_builtin_data);
23847 i++, fcode++)
23849 neon_builtin_datum *d = &neon_builtin_data[i];
23851 const char* const modenames[] = {
23852 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23853 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23854 "ti", "ei", "oi"
23856 char namebuf[60];
23857 tree ftype = NULL;
23858 int is_load = 0, is_store = 0;
23860 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23862 d->fcode = fcode;
23864 switch (d->itype)
23866 case NEON_LOAD1:
23867 case NEON_LOAD1LANE:
23868 case NEON_LOADSTRUCT:
23869 case NEON_LOADSTRUCTLANE:
23870 is_load = 1;
23871 /* Fall through. */
23872 case NEON_STORE1:
23873 case NEON_STORE1LANE:
23874 case NEON_STORESTRUCT:
23875 case NEON_STORESTRUCTLANE:
23876 if (!is_load)
23877 is_store = 1;
23878 /* Fall through. */
23879 case NEON_UNOP:
23880 case NEON_RINT:
23881 case NEON_BINOP:
23882 case NEON_LOGICBINOP:
23883 case NEON_SHIFTINSERT:
23884 case NEON_TERNOP:
23885 case NEON_GETLANE:
23886 case NEON_SETLANE:
23887 case NEON_CREATE:
23888 case NEON_DUP:
23889 case NEON_DUPLANE:
23890 case NEON_SHIFTIMM:
23891 case NEON_SHIFTACC:
23892 case NEON_COMBINE:
23893 case NEON_SPLIT:
23894 case NEON_CONVERT:
23895 case NEON_FIXCONV:
23896 case NEON_LANEMUL:
23897 case NEON_LANEMULL:
23898 case NEON_LANEMULH:
23899 case NEON_LANEMAC:
23900 case NEON_SCALARMUL:
23901 case NEON_SCALARMULL:
23902 case NEON_SCALARMULH:
23903 case NEON_SCALARMAC:
23904 case NEON_SELECT:
23905 case NEON_VTBL:
23906 case NEON_VTBX:
23908 int k;
23909 tree return_type = void_type_node, args = void_list_node;
23911 /* Build a function type directly from the insn_data for
23912 this builtin. The build_function_type() function takes
23913 care of removing duplicates for us. */
23914 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23916 tree eltype;
23918 if (is_load && k == 1)
23920 /* Neon load patterns always have the memory
23921 operand in the operand 1 position. */
23922 gcc_assert (insn_data[d->code].operand[k].predicate
23923 == neon_struct_operand);
23925 switch (d->mode)
23927 case T_V8QI:
23928 case T_V16QI:
23929 eltype = const_intQI_pointer_node;
23930 break;
23932 case T_V4HI:
23933 case T_V8HI:
23934 eltype = const_intHI_pointer_node;
23935 break;
23937 case T_V2SI:
23938 case T_V4SI:
23939 eltype = const_intSI_pointer_node;
23940 break;
23942 case T_V2SF:
23943 case T_V4SF:
23944 eltype = const_float_pointer_node;
23945 break;
23947 case T_DI:
23948 case T_V2DI:
23949 eltype = const_intDI_pointer_node;
23950 break;
23952 default: gcc_unreachable ();
23955 else if (is_store && k == 0)
23957 /* Similarly, Neon store patterns use operand 0 as
23958 the memory location to store to. */
23959 gcc_assert (insn_data[d->code].operand[k].predicate
23960 == neon_struct_operand);
23962 switch (d->mode)
23964 case T_V8QI:
23965 case T_V16QI:
23966 eltype = intQI_pointer_node;
23967 break;
23969 case T_V4HI:
23970 case T_V8HI:
23971 eltype = intHI_pointer_node;
23972 break;
23974 case T_V2SI:
23975 case T_V4SI:
23976 eltype = intSI_pointer_node;
23977 break;
23979 case T_V2SF:
23980 case T_V4SF:
23981 eltype = float_pointer_node;
23982 break;
23984 case T_DI:
23985 case T_V2DI:
23986 eltype = intDI_pointer_node;
23987 break;
23989 default: gcc_unreachable ();
23992 else
23994 switch (insn_data[d->code].operand[k].mode)
23996 case VOIDmode: eltype = void_type_node; break;
23997 /* Scalars. */
23998 case QImode: eltype = neon_intQI_type_node; break;
23999 case HImode: eltype = neon_intHI_type_node; break;
24000 case SImode: eltype = neon_intSI_type_node; break;
24001 case SFmode: eltype = neon_float_type_node; break;
24002 case DImode: eltype = neon_intDI_type_node; break;
24003 case TImode: eltype = intTI_type_node; break;
24004 case EImode: eltype = intEI_type_node; break;
24005 case OImode: eltype = intOI_type_node; break;
24006 case CImode: eltype = intCI_type_node; break;
24007 case XImode: eltype = intXI_type_node; break;
24008 /* 64-bit vectors. */
24009 case V8QImode: eltype = V8QI_type_node; break;
24010 case V4HImode: eltype = V4HI_type_node; break;
24011 case V2SImode: eltype = V2SI_type_node; break;
24012 case V2SFmode: eltype = V2SF_type_node; break;
24013 /* 128-bit vectors. */
24014 case V16QImode: eltype = V16QI_type_node; break;
24015 case V8HImode: eltype = V8HI_type_node; break;
24016 case V4SImode: eltype = V4SI_type_node; break;
24017 case V4SFmode: eltype = V4SF_type_node; break;
24018 case V2DImode: eltype = V2DI_type_node; break;
24019 default: gcc_unreachable ();
24023 if (k == 0 && !is_store)
24024 return_type = eltype;
24025 else
24026 args = tree_cons (NULL_TREE, eltype, args);
24029 ftype = build_function_type (return_type, args);
24031 break;
24033 case NEON_REINTERP:
24035 /* We iterate over NUM_DREG_TYPES doubleword types,
24036 then NUM_QREG_TYPES quadword types.
24037 V4HF is not a type used in reinterpret, so we translate
24038 d->mode to the correct index in reinterp_ftype_dreg. */
24039 bool qreg_p
24040 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24041 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24042 % NUM_QREG_TYPES;
24043 switch (insn_data[d->code].operand[0].mode)
24045 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24046 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24047 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24048 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24049 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24050 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24051 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24052 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24053 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24054 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24055 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24056 default: gcc_unreachable ();
24059 break;
24060 case NEON_FLOAT_WIDEN:
24062 tree eltype = NULL_TREE;
24063 tree return_type = NULL_TREE;
24065 switch (insn_data[d->code].operand[1].mode)
24067 case V4HFmode:
24068 eltype = V4HF_type_node;
24069 return_type = V4SF_type_node;
24070 break;
24071 default: gcc_unreachable ();
24073 ftype = build_function_type_list (return_type, eltype, NULL);
24074 break;
24076 case NEON_FLOAT_NARROW:
24078 tree eltype = NULL_TREE;
24079 tree return_type = NULL_TREE;
24081 switch (insn_data[d->code].operand[1].mode)
24083 case V4SFmode:
24084 eltype = V4SF_type_node;
24085 return_type = V4HF_type_node;
24086 break;
24087 default: gcc_unreachable ();
24089 ftype = build_function_type_list (return_type, eltype, NULL);
24090 break;
24092 case NEON_BSWAP:
24094 tree eltype = NULL_TREE;
24095 switch (insn_data[d->code].operand[1].mode)
24097 case V4HImode:
24098 eltype = V4UHI_type_node;
24099 break;
24100 case V8HImode:
24101 eltype = V8UHI_type_node;
24102 break;
24103 case V2SImode:
24104 eltype = V2USI_type_node;
24105 break;
24106 case V4SImode:
24107 eltype = V4USI_type_node;
24108 break;
24109 case V2DImode:
24110 eltype = V2UDI_type_node;
24111 break;
24112 default: gcc_unreachable ();
24114 ftype = build_function_type_list (eltype, eltype, NULL);
24115 break;
24117 default:
24118 gcc_unreachable ();
24121 gcc_assert (ftype != NULL);
24123 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24125 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24126 NULL_TREE);
24127 arm_builtin_decls[fcode] = decl;
24131 #undef NUM_DREG_TYPES
24132 #undef NUM_QREG_TYPES
24134 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24135 do \
24137 if ((MASK) & insn_flags) \
24139 tree bdecl; \
24140 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24141 BUILT_IN_MD, NULL, NULL_TREE); \
24142 arm_builtin_decls[CODE] = bdecl; \
24145 while (0)
24147 struct builtin_description
24149 const unsigned int mask;
24150 const enum insn_code icode;
24151 const char * const name;
24152 const enum arm_builtins code;
24153 const enum rtx_code comparison;
24154 const unsigned int flag;
24157 static const struct builtin_description bdesc_2arg[] =
24159 #define IWMMXT_BUILTIN(code, string, builtin) \
24160 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24161 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24163 #define IWMMXT2_BUILTIN(code, string, builtin) \
24164 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24165 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24167 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24168 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24169 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24170 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24171 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24172 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24173 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24174 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24175 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24176 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24177 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24178 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24179 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24180 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24181 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24182 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24183 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24184 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24185 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24186 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24187 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24188 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24189 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24190 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24191 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24192 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24193 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24194 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24195 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24196 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24197 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24198 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24199 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24200 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24201 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24202 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24203 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24204 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24205 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24206 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24207 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24208 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24209 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24210 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24211 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24212 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24213 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24214 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24215 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24216 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24217 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24218 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24219 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24220 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24221 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24222 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24223 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24224 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24225 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24226 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24227 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24228 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24229 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24230 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24231 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24232 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24233 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24234 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24235 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24236 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24237 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24238 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24239 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24240 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24241 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24242 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24243 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24244 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24246 #define IWMMXT_BUILTIN2(code, builtin) \
24247 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24249 #define IWMMXT2_BUILTIN2(code, builtin) \
24250 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24252 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24253 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24254 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24255 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24256 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24257 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24258 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24259 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24260 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24261 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24264 #define FP_BUILTIN(L, U) \
24265 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24266 UNKNOWN, 0},
24268 FP_BUILTIN (set_fpscr, GET_FPSCR)
24269 FP_BUILTIN (get_fpscr, SET_FPSCR)
24270 #undef FP_BUILTIN
24272 #define CRC32_BUILTIN(L, U) \
24273 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24274 UNKNOWN, 0},
24275 CRC32_BUILTIN (crc32b, CRC32B)
24276 CRC32_BUILTIN (crc32h, CRC32H)
24277 CRC32_BUILTIN (crc32w, CRC32W)
24278 CRC32_BUILTIN (crc32cb, CRC32CB)
24279 CRC32_BUILTIN (crc32ch, CRC32CH)
24280 CRC32_BUILTIN (crc32cw, CRC32CW)
24281 #undef CRC32_BUILTIN
24284 #define CRYPTO_BUILTIN(L, U) \
24285 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24286 UNKNOWN, 0},
24287 #undef CRYPTO1
24288 #undef CRYPTO2
24289 #undef CRYPTO3
24290 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24291 #define CRYPTO1(L, U, R, A)
24292 #define CRYPTO3(L, U, R, A1, A2, A3)
24293 #include "crypto.def"
24294 #undef CRYPTO1
24295 #undef CRYPTO2
24296 #undef CRYPTO3
24300 static const struct builtin_description bdesc_1arg[] =
24302 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24303 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24304 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24305 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24306 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24307 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24308 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24309 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24310 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24311 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24312 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24313 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24314 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24315 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24316 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24317 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24318 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24319 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24320 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24321 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24322 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24323 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24324 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24325 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24327 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24328 #define CRYPTO2(L, U, R, A1, A2)
24329 #define CRYPTO3(L, U, R, A1, A2, A3)
24330 #include "crypto.def"
24331 #undef CRYPTO1
24332 #undef CRYPTO2
24333 #undef CRYPTO3
24336 static const struct builtin_description bdesc_3arg[] =
24338 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24339 #define CRYPTO1(L, U, R, A)
24340 #define CRYPTO2(L, U, R, A1, A2)
24341 #include "crypto.def"
24342 #undef CRYPTO1
24343 #undef CRYPTO2
24344 #undef CRYPTO3
24346 #undef CRYPTO_BUILTIN
24348 /* Set up all the iWMMXt builtins. This is not called if
24349 TARGET_IWMMXT is zero. */
24351 static void
24352 arm_init_iwmmxt_builtins (void)
24354 const struct builtin_description * d;
24355 size_t i;
24357 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24358 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24359 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24361 tree v8qi_ftype_v8qi_v8qi_int
24362 = build_function_type_list (V8QI_type_node,
24363 V8QI_type_node, V8QI_type_node,
24364 integer_type_node, NULL_TREE);
24365 tree v4hi_ftype_v4hi_int
24366 = build_function_type_list (V4HI_type_node,
24367 V4HI_type_node, integer_type_node, NULL_TREE);
24368 tree v2si_ftype_v2si_int
24369 = build_function_type_list (V2SI_type_node,
24370 V2SI_type_node, integer_type_node, NULL_TREE);
24371 tree v2si_ftype_di_di
24372 = build_function_type_list (V2SI_type_node,
24373 long_long_integer_type_node,
24374 long_long_integer_type_node,
24375 NULL_TREE);
24376 tree di_ftype_di_int
24377 = build_function_type_list (long_long_integer_type_node,
24378 long_long_integer_type_node,
24379 integer_type_node, NULL_TREE);
24380 tree di_ftype_di_int_int
24381 = build_function_type_list (long_long_integer_type_node,
24382 long_long_integer_type_node,
24383 integer_type_node,
24384 integer_type_node, NULL_TREE);
24385 tree int_ftype_v8qi
24386 = build_function_type_list (integer_type_node,
24387 V8QI_type_node, NULL_TREE);
24388 tree int_ftype_v4hi
24389 = build_function_type_list (integer_type_node,
24390 V4HI_type_node, NULL_TREE);
24391 tree int_ftype_v2si
24392 = build_function_type_list (integer_type_node,
24393 V2SI_type_node, NULL_TREE);
24394 tree int_ftype_v8qi_int
24395 = build_function_type_list (integer_type_node,
24396 V8QI_type_node, integer_type_node, NULL_TREE);
24397 tree int_ftype_v4hi_int
24398 = build_function_type_list (integer_type_node,
24399 V4HI_type_node, integer_type_node, NULL_TREE);
24400 tree int_ftype_v2si_int
24401 = build_function_type_list (integer_type_node,
24402 V2SI_type_node, integer_type_node, NULL_TREE);
24403 tree v8qi_ftype_v8qi_int_int
24404 = build_function_type_list (V8QI_type_node,
24405 V8QI_type_node, integer_type_node,
24406 integer_type_node, NULL_TREE);
24407 tree v4hi_ftype_v4hi_int_int
24408 = build_function_type_list (V4HI_type_node,
24409 V4HI_type_node, integer_type_node,
24410 integer_type_node, NULL_TREE);
24411 tree v2si_ftype_v2si_int_int
24412 = build_function_type_list (V2SI_type_node,
24413 V2SI_type_node, integer_type_node,
24414 integer_type_node, NULL_TREE);
24415 /* Miscellaneous. */
24416 tree v8qi_ftype_v4hi_v4hi
24417 = build_function_type_list (V8QI_type_node,
24418 V4HI_type_node, V4HI_type_node, NULL_TREE);
24419 tree v4hi_ftype_v2si_v2si
24420 = build_function_type_list (V4HI_type_node,
24421 V2SI_type_node, V2SI_type_node, NULL_TREE);
24422 tree v8qi_ftype_v4hi_v8qi
24423 = build_function_type_list (V8QI_type_node,
24424 V4HI_type_node, V8QI_type_node, NULL_TREE);
24425 tree v2si_ftype_v4hi_v4hi
24426 = build_function_type_list (V2SI_type_node,
24427 V4HI_type_node, V4HI_type_node, NULL_TREE);
24428 tree v2si_ftype_v8qi_v8qi
24429 = build_function_type_list (V2SI_type_node,
24430 V8QI_type_node, V8QI_type_node, NULL_TREE);
24431 tree v4hi_ftype_v4hi_di
24432 = build_function_type_list (V4HI_type_node,
24433 V4HI_type_node, long_long_integer_type_node,
24434 NULL_TREE);
24435 tree v2si_ftype_v2si_di
24436 = build_function_type_list (V2SI_type_node,
24437 V2SI_type_node, long_long_integer_type_node,
24438 NULL_TREE);
24439 tree di_ftype_void
24440 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24441 tree int_ftype_void
24442 = build_function_type_list (integer_type_node, NULL_TREE);
24443 tree di_ftype_v8qi
24444 = build_function_type_list (long_long_integer_type_node,
24445 V8QI_type_node, NULL_TREE);
24446 tree di_ftype_v4hi
24447 = build_function_type_list (long_long_integer_type_node,
24448 V4HI_type_node, NULL_TREE);
24449 tree di_ftype_v2si
24450 = build_function_type_list (long_long_integer_type_node,
24451 V2SI_type_node, NULL_TREE);
24452 tree v2si_ftype_v4hi
24453 = build_function_type_list (V2SI_type_node,
24454 V4HI_type_node, NULL_TREE);
24455 tree v4hi_ftype_v8qi
24456 = build_function_type_list (V4HI_type_node,
24457 V8QI_type_node, NULL_TREE);
24458 tree v8qi_ftype_v8qi
24459 = build_function_type_list (V8QI_type_node,
24460 V8QI_type_node, NULL_TREE);
24461 tree v4hi_ftype_v4hi
24462 = build_function_type_list (V4HI_type_node,
24463 V4HI_type_node, NULL_TREE);
24464 tree v2si_ftype_v2si
24465 = build_function_type_list (V2SI_type_node,
24466 V2SI_type_node, NULL_TREE);
24468 tree di_ftype_di_v4hi_v4hi
24469 = build_function_type_list (long_long_unsigned_type_node,
24470 long_long_unsigned_type_node,
24471 V4HI_type_node, V4HI_type_node,
24472 NULL_TREE);
24474 tree di_ftype_v4hi_v4hi
24475 = build_function_type_list (long_long_unsigned_type_node,
24476 V4HI_type_node,V4HI_type_node,
24477 NULL_TREE);
24479 tree v2si_ftype_v2si_v4hi_v4hi
24480 = build_function_type_list (V2SI_type_node,
24481 V2SI_type_node, V4HI_type_node,
24482 V4HI_type_node, NULL_TREE);
24484 tree v2si_ftype_v2si_v8qi_v8qi
24485 = build_function_type_list (V2SI_type_node,
24486 V2SI_type_node, V8QI_type_node,
24487 V8QI_type_node, NULL_TREE);
24489 tree di_ftype_di_v2si_v2si
24490 = build_function_type_list (long_long_unsigned_type_node,
24491 long_long_unsigned_type_node,
24492 V2SI_type_node, V2SI_type_node,
24493 NULL_TREE);
24495 tree di_ftype_di_di_int
24496 = build_function_type_list (long_long_unsigned_type_node,
24497 long_long_unsigned_type_node,
24498 long_long_unsigned_type_node,
24499 integer_type_node, NULL_TREE);
24501 tree void_ftype_int
24502 = build_function_type_list (void_type_node,
24503 integer_type_node, NULL_TREE);
24505 tree v8qi_ftype_char
24506 = build_function_type_list (V8QI_type_node,
24507 signed_char_type_node, NULL_TREE);
24509 tree v4hi_ftype_short
24510 = build_function_type_list (V4HI_type_node,
24511 short_integer_type_node, NULL_TREE);
24513 tree v2si_ftype_int
24514 = build_function_type_list (V2SI_type_node,
24515 integer_type_node, NULL_TREE);
24517 /* Normal vector binops. */
24518 tree v8qi_ftype_v8qi_v8qi
24519 = build_function_type_list (V8QI_type_node,
24520 V8QI_type_node, V8QI_type_node, NULL_TREE);
24521 tree v4hi_ftype_v4hi_v4hi
24522 = build_function_type_list (V4HI_type_node,
24523 V4HI_type_node,V4HI_type_node, NULL_TREE);
24524 tree v2si_ftype_v2si_v2si
24525 = build_function_type_list (V2SI_type_node,
24526 V2SI_type_node, V2SI_type_node, NULL_TREE);
24527 tree di_ftype_di_di
24528 = build_function_type_list (long_long_unsigned_type_node,
24529 long_long_unsigned_type_node,
24530 long_long_unsigned_type_node,
24531 NULL_TREE);
24533 /* Add all builtins that are more or less simple operations on two
24534 operands. */
24535 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24537 /* Use one of the operands; the target can have a different mode for
24538 mask-generating compares. */
24539 enum machine_mode mode;
24540 tree type;
24542 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24543 continue;
24545 mode = insn_data[d->icode].operand[1].mode;
24547 switch (mode)
24549 case V8QImode:
24550 type = v8qi_ftype_v8qi_v8qi;
24551 break;
24552 case V4HImode:
24553 type = v4hi_ftype_v4hi_v4hi;
24554 break;
24555 case V2SImode:
24556 type = v2si_ftype_v2si_v2si;
24557 break;
24558 case DImode:
24559 type = di_ftype_di_di;
24560 break;
24562 default:
24563 gcc_unreachable ();
24566 def_mbuiltin (d->mask, d->name, type, d->code);
24569 /* Add the remaining MMX insns with somewhat more complicated types. */
24570 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24572 ARM_BUILTIN_ ## CODE)
24574 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24575 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24576 ARM_BUILTIN_ ## CODE)
24578 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24579 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24580 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24581 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24582 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24583 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24584 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24585 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24586 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24588 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24589 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24590 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24591 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24592 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24593 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24595 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24596 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24597 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24598 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24599 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24600 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24602 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24603 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24604 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24605 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24606 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24607 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24609 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24610 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24611 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24612 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24613 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24614 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24616 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24618 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24619 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24620 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24621 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24622 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24623 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24624 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24625 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24626 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24627 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24629 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24630 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24631 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24632 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24633 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24634 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24635 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24636 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24637 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24639 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24640 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24641 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24643 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24644 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24645 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24647 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24648 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24650 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24651 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24652 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24653 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24654 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24655 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24657 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24658 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24659 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24660 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24661 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24662 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24663 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24664 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24665 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24666 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24667 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24668 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24670 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24671 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24672 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24673 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24675 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24676 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24677 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24678 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24679 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24680 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24681 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24683 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24684 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24685 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24687 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24688 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24689 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24690 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24692 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24693 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24694 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24695 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24697 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24698 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24699 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24700 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24702 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24703 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24704 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24705 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24707 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24708 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24709 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24710 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24712 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24713 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24714 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24715 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24717 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24719 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24720 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24721 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24723 #undef iwmmx_mbuiltin
24724 #undef iwmmx2_mbuiltin
24727 static void
24728 arm_init_fp16_builtins (void)
24730 tree fp16_type = make_node (REAL_TYPE);
24731 TYPE_PRECISION (fp16_type) = 16;
24732 layout_type (fp16_type);
24733 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24736 static void
24737 arm_init_crc32_builtins ()
24739 tree si_ftype_si_qi
24740 = build_function_type_list (unsigned_intSI_type_node,
24741 unsigned_intSI_type_node,
24742 unsigned_intQI_type_node, NULL_TREE);
24743 tree si_ftype_si_hi
24744 = build_function_type_list (unsigned_intSI_type_node,
24745 unsigned_intSI_type_node,
24746 unsigned_intHI_type_node, NULL_TREE);
24747 tree si_ftype_si_si
24748 = build_function_type_list (unsigned_intSI_type_node,
24749 unsigned_intSI_type_node,
24750 unsigned_intSI_type_node, NULL_TREE);
24752 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24753 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24754 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24755 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24756 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24757 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24758 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24759 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24760 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24761 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24762 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24763 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24764 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24765 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24766 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24767 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24768 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24769 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24772 static void
24773 arm_init_builtins (void)
24775 if (TARGET_REALLY_IWMMXT)
24776 arm_init_iwmmxt_builtins ();
24778 if (TARGET_NEON)
24779 arm_init_neon_builtins ();
24781 if (arm_fp16_format)
24782 arm_init_fp16_builtins ();
24784 if (TARGET_CRC32)
24785 arm_init_crc32_builtins ();
24787 if (TARGET_VFP && TARGET_HARD_FLOAT)
24789 tree ftype_set_fpscr
24790 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24791 tree ftype_get_fpscr
24792 = build_function_type_list (unsigned_type_node, NULL);
24794 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24795 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24796 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24797 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24798 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24799 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24803 /* Return the ARM builtin for CODE. */
24805 static tree
24806 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24808 if (code >= ARM_BUILTIN_MAX)
24809 return error_mark_node;
24811 return arm_builtin_decls[code];
24814 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24816 static const char *
24817 arm_invalid_parameter_type (const_tree t)
24819 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24820 return N_("function parameters cannot have __fp16 type");
24821 return NULL;
24824 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24826 static const char *
24827 arm_invalid_return_type (const_tree t)
24829 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24830 return N_("functions cannot return __fp16 type");
24831 return NULL;
24834 /* Implement TARGET_PROMOTED_TYPE. */
24836 static tree
24837 arm_promoted_type (const_tree t)
24839 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24840 return float_type_node;
24841 return NULL_TREE;
24844 /* Implement TARGET_CONVERT_TO_TYPE.
24845 Specifically, this hook implements the peculiarity of the ARM
24846 half-precision floating-point C semantics that requires conversions between
24847 __fp16 to or from double to do an intermediate conversion to float. */
24849 static tree
24850 arm_convert_to_type (tree type, tree expr)
24852 tree fromtype = TREE_TYPE (expr);
24853 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24854 return NULL_TREE;
24855 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24856 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24857 return convert (type, convert (float_type_node, expr));
24858 return NULL_TREE;
24861 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24862 This simply adds HFmode as a supported mode; even though we don't
24863 implement arithmetic on this type directly, it's supported by
24864 optabs conversions, much the way the double-word arithmetic is
24865 special-cased in the default hook. */
24867 static bool
24868 arm_scalar_mode_supported_p (enum machine_mode mode)
24870 if (mode == HFmode)
24871 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24872 else if (ALL_FIXED_POINT_MODE_P (mode))
24873 return true;
24874 else
24875 return default_scalar_mode_supported_p (mode);
24878 /* Errors in the source file can cause expand_expr to return const0_rtx
24879 where we expect a vector. To avoid crashing, use one of the vector
24880 clear instructions. */
24882 static rtx
24883 safe_vector_operand (rtx x, enum machine_mode mode)
24885 if (x != const0_rtx)
24886 return x;
24887 x = gen_reg_rtx (mode);
24889 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24890 : gen_rtx_SUBREG (DImode, x, 0)));
24891 return x;
24894 /* Function to expand ternary builtins. */
24895 static rtx
24896 arm_expand_ternop_builtin (enum insn_code icode,
24897 tree exp, rtx target)
24899 rtx pat;
24900 tree arg0 = CALL_EXPR_ARG (exp, 0);
24901 tree arg1 = CALL_EXPR_ARG (exp, 1);
24902 tree arg2 = CALL_EXPR_ARG (exp, 2);
24904 rtx op0 = expand_normal (arg0);
24905 rtx op1 = expand_normal (arg1);
24906 rtx op2 = expand_normal (arg2);
24907 rtx op3 = NULL_RTX;
24909 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24910 lane operand depending on endianness. */
24911 bool builtin_sha1cpm_p = false;
24913 if (insn_data[icode].n_operands == 5)
24915 gcc_assert (icode == CODE_FOR_crypto_sha1c
24916 || icode == CODE_FOR_crypto_sha1p
24917 || icode == CODE_FOR_crypto_sha1m);
24918 builtin_sha1cpm_p = true;
24920 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24921 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24922 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24923 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24926 if (VECTOR_MODE_P (mode0))
24927 op0 = safe_vector_operand (op0, mode0);
24928 if (VECTOR_MODE_P (mode1))
24929 op1 = safe_vector_operand (op1, mode1);
24930 if (VECTOR_MODE_P (mode2))
24931 op2 = safe_vector_operand (op2, mode2);
24933 if (! target
24934 || GET_MODE (target) != tmode
24935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24936 target = gen_reg_rtx (tmode);
24938 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24939 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24940 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24942 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24943 op0 = copy_to_mode_reg (mode0, op0);
24944 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24945 op1 = copy_to_mode_reg (mode1, op1);
24946 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24947 op2 = copy_to_mode_reg (mode2, op2);
24948 if (builtin_sha1cpm_p)
24949 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24951 if (builtin_sha1cpm_p)
24952 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24953 else
24954 pat = GEN_FCN (icode) (target, op0, op1, op2);
24955 if (! pat)
24956 return 0;
24957 emit_insn (pat);
24958 return target;
24961 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24963 static rtx
24964 arm_expand_binop_builtin (enum insn_code icode,
24965 tree exp, rtx target)
24967 rtx pat;
24968 tree arg0 = CALL_EXPR_ARG (exp, 0);
24969 tree arg1 = CALL_EXPR_ARG (exp, 1);
24970 rtx op0 = expand_normal (arg0);
24971 rtx op1 = expand_normal (arg1);
24972 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24973 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24974 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24976 if (VECTOR_MODE_P (mode0))
24977 op0 = safe_vector_operand (op0, mode0);
24978 if (VECTOR_MODE_P (mode1))
24979 op1 = safe_vector_operand (op1, mode1);
24981 if (! target
24982 || GET_MODE (target) != tmode
24983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24984 target = gen_reg_rtx (tmode);
24986 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24987 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24989 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24990 op0 = copy_to_mode_reg (mode0, op0);
24991 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24992 op1 = copy_to_mode_reg (mode1, op1);
24994 pat = GEN_FCN (icode) (target, op0, op1);
24995 if (! pat)
24996 return 0;
24997 emit_insn (pat);
24998 return target;
25001 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25003 static rtx
25004 arm_expand_unop_builtin (enum insn_code icode,
25005 tree exp, rtx target, int do_load)
25007 rtx pat;
25008 tree arg0 = CALL_EXPR_ARG (exp, 0);
25009 rtx op0 = expand_normal (arg0);
25010 rtx op1 = NULL_RTX;
25011 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25012 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25013 bool builtin_sha1h_p = false;
25015 if (insn_data[icode].n_operands == 3)
25017 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25018 builtin_sha1h_p = true;
25021 if (! target
25022 || GET_MODE (target) != tmode
25023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25024 target = gen_reg_rtx (tmode);
25025 if (do_load)
25026 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25027 else
25029 if (VECTOR_MODE_P (mode0))
25030 op0 = safe_vector_operand (op0, mode0);
25032 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25033 op0 = copy_to_mode_reg (mode0, op0);
25035 if (builtin_sha1h_p)
25036 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25038 if (builtin_sha1h_p)
25039 pat = GEN_FCN (icode) (target, op0, op1);
25040 else
25041 pat = GEN_FCN (icode) (target, op0);
25042 if (! pat)
25043 return 0;
25044 emit_insn (pat);
25045 return target;
25048 typedef enum {
25049 NEON_ARG_COPY_TO_REG,
25050 NEON_ARG_CONSTANT,
25051 NEON_ARG_MEMORY,
25052 NEON_ARG_STOP
25053 } builtin_arg;
25055 #define NEON_MAX_BUILTIN_ARGS 5
25057 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25058 and return an expression for the accessed memory.
25060 The intrinsic function operates on a block of registers that has
25061 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25062 function references the memory at EXP of type TYPE and in mode
25063 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25064 available. */
25066 static tree
25067 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25068 enum machine_mode reg_mode,
25069 neon_builtin_type_mode type_mode)
25071 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25072 tree elem_type, upper_bound, array_type;
25074 /* Work out the size of the register block in bytes. */
25075 reg_size = GET_MODE_SIZE (reg_mode);
25077 /* Work out the size of each vector in bytes. */
25078 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25079 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25081 /* Work out how many vectors there are. */
25082 gcc_assert (reg_size % vector_size == 0);
25083 nvectors = reg_size / vector_size;
25085 /* Work out the type of each element. */
25086 gcc_assert (POINTER_TYPE_P (type));
25087 elem_type = TREE_TYPE (type);
25089 /* Work out how many elements are being loaded or stored.
25090 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25091 and memory elements; anything else implies a lane load or store. */
25092 if (mem_mode == reg_mode)
25093 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25094 else
25095 nelems = nvectors;
25097 /* Create a type that describes the full access. */
25098 upper_bound = build_int_cst (size_type_node, nelems - 1);
25099 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25101 /* Dereference EXP using that type. */
25102 return fold_build2 (MEM_REF, array_type, exp,
25103 build_int_cst (build_pointer_type (array_type), 0));
25106 /* Expand a Neon builtin. */
25107 static rtx
25108 arm_expand_neon_args (rtx target, int icode, int have_retval,
25109 neon_builtin_type_mode type_mode,
25110 tree exp, int fcode, ...)
25112 va_list ap;
25113 rtx pat;
25114 tree arg[NEON_MAX_BUILTIN_ARGS];
25115 rtx op[NEON_MAX_BUILTIN_ARGS];
25116 tree arg_type;
25117 tree formals;
25118 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25119 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25120 enum machine_mode other_mode;
25121 int argc = 0;
25122 int opno;
25124 if (have_retval
25125 && (!target
25126 || GET_MODE (target) != tmode
25127 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25128 target = gen_reg_rtx (tmode);
25130 va_start (ap, fcode);
25132 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25134 for (;;)
25136 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25138 if (thisarg == NEON_ARG_STOP)
25139 break;
25140 else
25142 opno = argc + have_retval;
25143 mode[argc] = insn_data[icode].operand[opno].mode;
25144 arg[argc] = CALL_EXPR_ARG (exp, argc);
25145 arg_type = TREE_VALUE (formals);
25146 if (thisarg == NEON_ARG_MEMORY)
25148 other_mode = insn_data[icode].operand[1 - opno].mode;
25149 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25150 mode[argc], other_mode,
25151 type_mode);
25154 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25155 be returned. */
25156 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25157 (thisarg == NEON_ARG_MEMORY
25158 ? EXPAND_MEMORY : EXPAND_NORMAL));
25160 switch (thisarg)
25162 case NEON_ARG_COPY_TO_REG:
25163 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25164 if (!(*insn_data[icode].operand[opno].predicate)
25165 (op[argc], mode[argc]))
25166 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25167 break;
25169 case NEON_ARG_CONSTANT:
25170 /* FIXME: This error message is somewhat unhelpful. */
25171 if (!(*insn_data[icode].operand[opno].predicate)
25172 (op[argc], mode[argc]))
25173 error ("argument must be a constant");
25174 break;
25176 case NEON_ARG_MEMORY:
25177 /* Check if expand failed. */
25178 if (op[argc] == const0_rtx)
25179 return 0;
25180 gcc_assert (MEM_P (op[argc]));
25181 PUT_MODE (op[argc], mode[argc]);
25182 /* ??? arm_neon.h uses the same built-in functions for signed
25183 and unsigned accesses, casting where necessary. This isn't
25184 alias safe. */
25185 set_mem_alias_set (op[argc], 0);
25186 if (!(*insn_data[icode].operand[opno].predicate)
25187 (op[argc], mode[argc]))
25188 op[argc] = (replace_equiv_address
25189 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25190 break;
25192 case NEON_ARG_STOP:
25193 gcc_unreachable ();
25196 argc++;
25197 formals = TREE_CHAIN (formals);
25201 va_end (ap);
25203 if (have_retval)
25204 switch (argc)
25206 case 1:
25207 pat = GEN_FCN (icode) (target, op[0]);
25208 break;
25210 case 2:
25211 pat = GEN_FCN (icode) (target, op[0], op[1]);
25212 break;
25214 case 3:
25215 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25216 break;
25218 case 4:
25219 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25220 break;
25222 case 5:
25223 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25224 break;
25226 default:
25227 gcc_unreachable ();
25229 else
25230 switch (argc)
25232 case 1:
25233 pat = GEN_FCN (icode) (op[0]);
25234 break;
25236 case 2:
25237 pat = GEN_FCN (icode) (op[0], op[1]);
25238 break;
25240 case 3:
25241 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25242 break;
25244 case 4:
25245 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25246 break;
25248 case 5:
25249 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25250 break;
25252 default:
25253 gcc_unreachable ();
25256 if (!pat)
25257 return 0;
25259 emit_insn (pat);
25261 return target;
25264 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25265 constants defined per-instruction or per instruction-variant. Instead, the
25266 required info is looked up in the table neon_builtin_data. */
25267 static rtx
25268 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25270 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25271 neon_itype itype = d->itype;
25272 enum insn_code icode = d->code;
25273 neon_builtin_type_mode type_mode = d->mode;
25275 switch (itype)
25277 case NEON_UNOP:
25278 case NEON_CONVERT:
25279 case NEON_DUPLANE:
25280 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25281 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25283 case NEON_BINOP:
25284 case NEON_SETLANE:
25285 case NEON_SCALARMUL:
25286 case NEON_SCALARMULL:
25287 case NEON_SCALARMULH:
25288 case NEON_SHIFTINSERT:
25289 case NEON_LOGICBINOP:
25290 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25291 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25292 NEON_ARG_STOP);
25294 case NEON_TERNOP:
25295 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25296 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25297 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25299 case NEON_GETLANE:
25300 case NEON_FIXCONV:
25301 case NEON_SHIFTIMM:
25302 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25303 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25304 NEON_ARG_STOP);
25306 case NEON_CREATE:
25307 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25308 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25310 case NEON_DUP:
25311 case NEON_RINT:
25312 case NEON_SPLIT:
25313 case NEON_FLOAT_WIDEN:
25314 case NEON_FLOAT_NARROW:
25315 case NEON_BSWAP:
25316 case NEON_REINTERP:
25317 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25318 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25320 case NEON_COMBINE:
25321 case NEON_VTBL:
25322 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25323 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25325 case NEON_LANEMUL:
25326 case NEON_LANEMULL:
25327 case NEON_LANEMULH:
25328 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25329 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25330 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25332 case NEON_LANEMAC:
25333 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25334 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25335 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25337 case NEON_SHIFTACC:
25338 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25339 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25340 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25342 case NEON_SCALARMAC:
25343 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25344 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25345 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25347 case NEON_SELECT:
25348 case NEON_VTBX:
25349 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25350 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25351 NEON_ARG_STOP);
25353 case NEON_LOAD1:
25354 case NEON_LOADSTRUCT:
25355 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25356 NEON_ARG_MEMORY, NEON_ARG_STOP);
25358 case NEON_LOAD1LANE:
25359 case NEON_LOADSTRUCTLANE:
25360 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25361 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25362 NEON_ARG_STOP);
25364 case NEON_STORE1:
25365 case NEON_STORESTRUCT:
25366 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25367 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25369 case NEON_STORE1LANE:
25370 case NEON_STORESTRUCTLANE:
25371 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25372 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25373 NEON_ARG_STOP);
25376 gcc_unreachable ();
25379 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25380 void
25381 neon_reinterpret (rtx dest, rtx src)
25383 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25386 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25387 not to early-clobber SRC registers in the process.
25389 We assume that the operands described by SRC and DEST represent a
25390 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25391 number of components into which the copy has been decomposed. */
25392 void
25393 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25395 unsigned int i;
25397 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25398 || REGNO (operands[0]) < REGNO (operands[1]))
25400 for (i = 0; i < count; i++)
25402 operands[2 * i] = dest[i];
25403 operands[2 * i + 1] = src[i];
25406 else
25408 for (i = 0; i < count; i++)
25410 operands[2 * i] = dest[count - i - 1];
25411 operands[2 * i + 1] = src[count - i - 1];
25416 /* Split operands into moves from op[1] + op[2] into op[0]. */
25418 void
25419 neon_split_vcombine (rtx operands[3])
25421 unsigned int dest = REGNO (operands[0]);
25422 unsigned int src1 = REGNO (operands[1]);
25423 unsigned int src2 = REGNO (operands[2]);
25424 enum machine_mode halfmode = GET_MODE (operands[1]);
25425 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25426 rtx destlo, desthi;
25428 if (src1 == dest && src2 == dest + halfregs)
25430 /* No-op move. Can't split to nothing; emit something. */
25431 emit_note (NOTE_INSN_DELETED);
25432 return;
25435 /* Preserve register attributes for variable tracking. */
25436 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25437 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25438 GET_MODE_SIZE (halfmode));
25440 /* Special case of reversed high/low parts. Use VSWP. */
25441 if (src2 == dest && src1 == dest + halfregs)
25443 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25444 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25445 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25446 return;
25449 if (!reg_overlap_mentioned_p (operands[2], destlo))
25451 /* Try to avoid unnecessary moves if part of the result
25452 is in the right place already. */
25453 if (src1 != dest)
25454 emit_move_insn (destlo, operands[1]);
25455 if (src2 != dest + halfregs)
25456 emit_move_insn (desthi, operands[2]);
25458 else
25460 if (src2 != dest + halfregs)
25461 emit_move_insn (desthi, operands[2]);
25462 if (src1 != dest)
25463 emit_move_insn (destlo, operands[1]);
25467 /* Expand an expression EXP that calls a built-in function,
25468 with result going to TARGET if that's convenient
25469 (and in mode MODE if that's convenient).
25470 SUBTARGET may be used as the target for computing one of EXP's operands.
25471 IGNORE is nonzero if the value is to be ignored. */
25473 static rtx
25474 arm_expand_builtin (tree exp,
25475 rtx target,
25476 rtx subtarget ATTRIBUTE_UNUSED,
25477 enum machine_mode mode ATTRIBUTE_UNUSED,
25478 int ignore ATTRIBUTE_UNUSED)
25480 const struct builtin_description * d;
25481 enum insn_code icode;
25482 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25483 tree arg0;
25484 tree arg1;
25485 tree arg2;
25486 rtx op0;
25487 rtx op1;
25488 rtx op2;
25489 rtx pat;
25490 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25491 size_t i;
25492 enum machine_mode tmode;
25493 enum machine_mode mode0;
25494 enum machine_mode mode1;
25495 enum machine_mode mode2;
25496 int opint;
25497 int selector;
25498 int mask;
25499 int imm;
25501 if (fcode >= ARM_BUILTIN_NEON_BASE)
25502 return arm_expand_neon_builtin (fcode, exp, target);
25504 switch (fcode)
25506 case ARM_BUILTIN_GET_FPSCR:
25507 case ARM_BUILTIN_SET_FPSCR:
25508 if (fcode == ARM_BUILTIN_GET_FPSCR)
25510 icode = CODE_FOR_get_fpscr;
25511 target = gen_reg_rtx (SImode);
25512 pat = GEN_FCN (icode) (target);
25514 else
25516 target = NULL_RTX;
25517 icode = CODE_FOR_set_fpscr;
25518 arg0 = CALL_EXPR_ARG (exp, 0);
25519 op0 = expand_normal (arg0);
25520 pat = GEN_FCN (icode) (op0);
25522 emit_insn (pat);
25523 return target;
25525 case ARM_BUILTIN_TEXTRMSB:
25526 case ARM_BUILTIN_TEXTRMUB:
25527 case ARM_BUILTIN_TEXTRMSH:
25528 case ARM_BUILTIN_TEXTRMUH:
25529 case ARM_BUILTIN_TEXTRMSW:
25530 case ARM_BUILTIN_TEXTRMUW:
25531 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25532 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25533 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25534 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25535 : CODE_FOR_iwmmxt_textrmw);
25537 arg0 = CALL_EXPR_ARG (exp, 0);
25538 arg1 = CALL_EXPR_ARG (exp, 1);
25539 op0 = expand_normal (arg0);
25540 op1 = expand_normal (arg1);
25541 tmode = insn_data[icode].operand[0].mode;
25542 mode0 = insn_data[icode].operand[1].mode;
25543 mode1 = insn_data[icode].operand[2].mode;
25545 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25546 op0 = copy_to_mode_reg (mode0, op0);
25547 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25549 /* @@@ better error message */
25550 error ("selector must be an immediate");
25551 return gen_reg_rtx (tmode);
25554 opint = INTVAL (op1);
25555 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25557 if (opint > 7 || opint < 0)
25558 error ("the range of selector should be in 0 to 7");
25560 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25562 if (opint > 3 || opint < 0)
25563 error ("the range of selector should be in 0 to 3");
25565 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25567 if (opint > 1 || opint < 0)
25568 error ("the range of selector should be in 0 to 1");
25571 if (target == 0
25572 || GET_MODE (target) != tmode
25573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25574 target = gen_reg_rtx (tmode);
25575 pat = GEN_FCN (icode) (target, op0, op1);
25576 if (! pat)
25577 return 0;
25578 emit_insn (pat);
25579 return target;
25581 case ARM_BUILTIN_WALIGNI:
25582 /* If op2 is immediate, call walighi, else call walighr. */
25583 arg0 = CALL_EXPR_ARG (exp, 0);
25584 arg1 = CALL_EXPR_ARG (exp, 1);
25585 arg2 = CALL_EXPR_ARG (exp, 2);
25586 op0 = expand_normal (arg0);
25587 op1 = expand_normal (arg1);
25588 op2 = expand_normal (arg2);
25589 if (CONST_INT_P (op2))
25591 icode = CODE_FOR_iwmmxt_waligni;
25592 tmode = insn_data[icode].operand[0].mode;
25593 mode0 = insn_data[icode].operand[1].mode;
25594 mode1 = insn_data[icode].operand[2].mode;
25595 mode2 = insn_data[icode].operand[3].mode;
25596 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25597 op0 = copy_to_mode_reg (mode0, op0);
25598 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25599 op1 = copy_to_mode_reg (mode1, op1);
25600 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25601 selector = INTVAL (op2);
25602 if (selector > 7 || selector < 0)
25603 error ("the range of selector should be in 0 to 7");
25605 else
25607 icode = CODE_FOR_iwmmxt_walignr;
25608 tmode = insn_data[icode].operand[0].mode;
25609 mode0 = insn_data[icode].operand[1].mode;
25610 mode1 = insn_data[icode].operand[2].mode;
25611 mode2 = insn_data[icode].operand[3].mode;
25612 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25613 op0 = copy_to_mode_reg (mode0, op0);
25614 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25615 op1 = copy_to_mode_reg (mode1, op1);
25616 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25617 op2 = copy_to_mode_reg (mode2, op2);
25619 if (target == 0
25620 || GET_MODE (target) != tmode
25621 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25622 target = gen_reg_rtx (tmode);
25623 pat = GEN_FCN (icode) (target, op0, op1, op2);
25624 if (!pat)
25625 return 0;
25626 emit_insn (pat);
25627 return target;
25629 case ARM_BUILTIN_TINSRB:
25630 case ARM_BUILTIN_TINSRH:
25631 case ARM_BUILTIN_TINSRW:
25632 case ARM_BUILTIN_WMERGE:
25633 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25634 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25635 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25636 : CODE_FOR_iwmmxt_tinsrw);
25637 arg0 = CALL_EXPR_ARG (exp, 0);
25638 arg1 = CALL_EXPR_ARG (exp, 1);
25639 arg2 = CALL_EXPR_ARG (exp, 2);
25640 op0 = expand_normal (arg0);
25641 op1 = expand_normal (arg1);
25642 op2 = expand_normal (arg2);
25643 tmode = insn_data[icode].operand[0].mode;
25644 mode0 = insn_data[icode].operand[1].mode;
25645 mode1 = insn_data[icode].operand[2].mode;
25646 mode2 = insn_data[icode].operand[3].mode;
25648 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25649 op0 = copy_to_mode_reg (mode0, op0);
25650 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25651 op1 = copy_to_mode_reg (mode1, op1);
25652 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25654 error ("selector must be an immediate");
25655 return const0_rtx;
25657 if (icode == CODE_FOR_iwmmxt_wmerge)
25659 selector = INTVAL (op2);
25660 if (selector > 7 || selector < 0)
25661 error ("the range of selector should be in 0 to 7");
25663 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25664 || (icode == CODE_FOR_iwmmxt_tinsrh)
25665 || (icode == CODE_FOR_iwmmxt_tinsrw))
25667 mask = 0x01;
25668 selector= INTVAL (op2);
25669 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25670 error ("the range of selector should be in 0 to 7");
25671 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25672 error ("the range of selector should be in 0 to 3");
25673 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25674 error ("the range of selector should be in 0 to 1");
25675 mask <<= selector;
25676 op2 = GEN_INT (mask);
25678 if (target == 0
25679 || GET_MODE (target) != tmode
25680 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25681 target = gen_reg_rtx (tmode);
25682 pat = GEN_FCN (icode) (target, op0, op1, op2);
25683 if (! pat)
25684 return 0;
25685 emit_insn (pat);
25686 return target;
25688 case ARM_BUILTIN_SETWCGR0:
25689 case ARM_BUILTIN_SETWCGR1:
25690 case ARM_BUILTIN_SETWCGR2:
25691 case ARM_BUILTIN_SETWCGR3:
25692 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25693 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25694 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25695 : CODE_FOR_iwmmxt_setwcgr3);
25696 arg0 = CALL_EXPR_ARG (exp, 0);
25697 op0 = expand_normal (arg0);
25698 mode0 = insn_data[icode].operand[0].mode;
25699 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25700 op0 = copy_to_mode_reg (mode0, op0);
25701 pat = GEN_FCN (icode) (op0);
25702 if (!pat)
25703 return 0;
25704 emit_insn (pat);
25705 return 0;
25707 case ARM_BUILTIN_GETWCGR0:
25708 case ARM_BUILTIN_GETWCGR1:
25709 case ARM_BUILTIN_GETWCGR2:
25710 case ARM_BUILTIN_GETWCGR3:
25711 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25712 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25713 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25714 : CODE_FOR_iwmmxt_getwcgr3);
25715 tmode = insn_data[icode].operand[0].mode;
25716 if (target == 0
25717 || GET_MODE (target) != tmode
25718 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25719 target = gen_reg_rtx (tmode);
25720 pat = GEN_FCN (icode) (target);
25721 if (!pat)
25722 return 0;
25723 emit_insn (pat);
25724 return target;
25726 case ARM_BUILTIN_WSHUFH:
25727 icode = CODE_FOR_iwmmxt_wshufh;
25728 arg0 = CALL_EXPR_ARG (exp, 0);
25729 arg1 = CALL_EXPR_ARG (exp, 1);
25730 op0 = expand_normal (arg0);
25731 op1 = expand_normal (arg1);
25732 tmode = insn_data[icode].operand[0].mode;
25733 mode1 = insn_data[icode].operand[1].mode;
25734 mode2 = insn_data[icode].operand[2].mode;
25736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25737 op0 = copy_to_mode_reg (mode1, op0);
25738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25740 error ("mask must be an immediate");
25741 return const0_rtx;
25743 selector = INTVAL (op1);
25744 if (selector < 0 || selector > 255)
25745 error ("the range of mask should be in 0 to 255");
25746 if (target == 0
25747 || GET_MODE (target) != tmode
25748 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25749 target = gen_reg_rtx (tmode);
25750 pat = GEN_FCN (icode) (target, op0, op1);
25751 if (! pat)
25752 return 0;
25753 emit_insn (pat);
25754 return target;
25756 case ARM_BUILTIN_WMADDS:
25757 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25758 case ARM_BUILTIN_WMADDSX:
25759 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25760 case ARM_BUILTIN_WMADDSN:
25761 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25762 case ARM_BUILTIN_WMADDU:
25763 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25764 case ARM_BUILTIN_WMADDUX:
25765 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25766 case ARM_BUILTIN_WMADDUN:
25767 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25768 case ARM_BUILTIN_WSADBZ:
25769 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25770 case ARM_BUILTIN_WSADHZ:
25771 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25773 /* Several three-argument builtins. */
25774 case ARM_BUILTIN_WMACS:
25775 case ARM_BUILTIN_WMACU:
25776 case ARM_BUILTIN_TMIA:
25777 case ARM_BUILTIN_TMIAPH:
25778 case ARM_BUILTIN_TMIATT:
25779 case ARM_BUILTIN_TMIATB:
25780 case ARM_BUILTIN_TMIABT:
25781 case ARM_BUILTIN_TMIABB:
25782 case ARM_BUILTIN_WQMIABB:
25783 case ARM_BUILTIN_WQMIABT:
25784 case ARM_BUILTIN_WQMIATB:
25785 case ARM_BUILTIN_WQMIATT:
25786 case ARM_BUILTIN_WQMIABBN:
25787 case ARM_BUILTIN_WQMIABTN:
25788 case ARM_BUILTIN_WQMIATBN:
25789 case ARM_BUILTIN_WQMIATTN:
25790 case ARM_BUILTIN_WMIABB:
25791 case ARM_BUILTIN_WMIABT:
25792 case ARM_BUILTIN_WMIATB:
25793 case ARM_BUILTIN_WMIATT:
25794 case ARM_BUILTIN_WMIABBN:
25795 case ARM_BUILTIN_WMIABTN:
25796 case ARM_BUILTIN_WMIATBN:
25797 case ARM_BUILTIN_WMIATTN:
25798 case ARM_BUILTIN_WMIAWBB:
25799 case ARM_BUILTIN_WMIAWBT:
25800 case ARM_BUILTIN_WMIAWTB:
25801 case ARM_BUILTIN_WMIAWTT:
25802 case ARM_BUILTIN_WMIAWBBN:
25803 case ARM_BUILTIN_WMIAWBTN:
25804 case ARM_BUILTIN_WMIAWTBN:
25805 case ARM_BUILTIN_WMIAWTTN:
25806 case ARM_BUILTIN_WSADB:
25807 case ARM_BUILTIN_WSADH:
25808 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25809 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25810 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25811 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25812 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25813 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25814 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25815 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25816 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25817 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25818 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25819 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25820 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25821 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25822 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25823 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25824 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25825 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25826 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25827 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25828 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25829 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25830 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25831 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25832 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25833 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25834 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25835 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25836 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25837 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25838 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25839 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25840 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25841 : CODE_FOR_iwmmxt_wsadh);
25842 arg0 = CALL_EXPR_ARG (exp, 0);
25843 arg1 = CALL_EXPR_ARG (exp, 1);
25844 arg2 = CALL_EXPR_ARG (exp, 2);
25845 op0 = expand_normal (arg0);
25846 op1 = expand_normal (arg1);
25847 op2 = expand_normal (arg2);
25848 tmode = insn_data[icode].operand[0].mode;
25849 mode0 = insn_data[icode].operand[1].mode;
25850 mode1 = insn_data[icode].operand[2].mode;
25851 mode2 = insn_data[icode].operand[3].mode;
25853 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25854 op0 = copy_to_mode_reg (mode0, op0);
25855 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25856 op1 = copy_to_mode_reg (mode1, op1);
25857 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25858 op2 = copy_to_mode_reg (mode2, op2);
25859 if (target == 0
25860 || GET_MODE (target) != tmode
25861 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25862 target = gen_reg_rtx (tmode);
25863 pat = GEN_FCN (icode) (target, op0, op1, op2);
25864 if (! pat)
25865 return 0;
25866 emit_insn (pat);
25867 return target;
25869 case ARM_BUILTIN_WZERO:
25870 target = gen_reg_rtx (DImode);
25871 emit_insn (gen_iwmmxt_clrdi (target));
25872 return target;
25874 case ARM_BUILTIN_WSRLHI:
25875 case ARM_BUILTIN_WSRLWI:
25876 case ARM_BUILTIN_WSRLDI:
25877 case ARM_BUILTIN_WSLLHI:
25878 case ARM_BUILTIN_WSLLWI:
25879 case ARM_BUILTIN_WSLLDI:
25880 case ARM_BUILTIN_WSRAHI:
25881 case ARM_BUILTIN_WSRAWI:
25882 case ARM_BUILTIN_WSRADI:
25883 case ARM_BUILTIN_WRORHI:
25884 case ARM_BUILTIN_WRORWI:
25885 case ARM_BUILTIN_WRORDI:
25886 case ARM_BUILTIN_WSRLH:
25887 case ARM_BUILTIN_WSRLW:
25888 case ARM_BUILTIN_WSRLD:
25889 case ARM_BUILTIN_WSLLH:
25890 case ARM_BUILTIN_WSLLW:
25891 case ARM_BUILTIN_WSLLD:
25892 case ARM_BUILTIN_WSRAH:
25893 case ARM_BUILTIN_WSRAW:
25894 case ARM_BUILTIN_WSRAD:
25895 case ARM_BUILTIN_WRORH:
25896 case ARM_BUILTIN_WRORW:
25897 case ARM_BUILTIN_WRORD:
25898 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25899 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25900 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25901 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25902 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25903 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25904 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25905 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25906 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25907 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25908 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25909 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25910 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25911 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25912 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25913 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25914 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25915 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25916 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25917 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25918 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25919 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25920 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25921 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25922 : CODE_FOR_nothing);
25923 arg1 = CALL_EXPR_ARG (exp, 1);
25924 op1 = expand_normal (arg1);
25925 if (GET_MODE (op1) == VOIDmode)
25927 imm = INTVAL (op1);
25928 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25929 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25930 && (imm < 0 || imm > 32))
25932 if (fcode == ARM_BUILTIN_WRORHI)
25933 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25934 else if (fcode == ARM_BUILTIN_WRORWI)
25935 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25936 else if (fcode == ARM_BUILTIN_WRORH)
25937 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25938 else
25939 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25941 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25942 && (imm < 0 || imm > 64))
25944 if (fcode == ARM_BUILTIN_WRORDI)
25945 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25946 else
25947 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25949 else if (imm < 0)
25951 if (fcode == ARM_BUILTIN_WSRLHI)
25952 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25953 else if (fcode == ARM_BUILTIN_WSRLWI)
25954 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25955 else if (fcode == ARM_BUILTIN_WSRLDI)
25956 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25957 else if (fcode == ARM_BUILTIN_WSLLHI)
25958 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25959 else if (fcode == ARM_BUILTIN_WSLLWI)
25960 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25961 else if (fcode == ARM_BUILTIN_WSLLDI)
25962 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25963 else if (fcode == ARM_BUILTIN_WSRAHI)
25964 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25965 else if (fcode == ARM_BUILTIN_WSRAWI)
25966 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25967 else if (fcode == ARM_BUILTIN_WSRADI)
25968 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25969 else if (fcode == ARM_BUILTIN_WSRLH)
25970 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25971 else if (fcode == ARM_BUILTIN_WSRLW)
25972 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25973 else if (fcode == ARM_BUILTIN_WSRLD)
25974 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25975 else if (fcode == ARM_BUILTIN_WSLLH)
25976 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25977 else if (fcode == ARM_BUILTIN_WSLLW)
25978 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25979 else if (fcode == ARM_BUILTIN_WSLLD)
25980 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25981 else if (fcode == ARM_BUILTIN_WSRAH)
25982 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25983 else if (fcode == ARM_BUILTIN_WSRAW)
25984 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25985 else
25986 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25989 return arm_expand_binop_builtin (icode, exp, target);
25991 default:
25992 break;
25995 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25996 if (d->code == (const enum arm_builtins) fcode)
25997 return arm_expand_binop_builtin (d->icode, exp, target);
25999 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26000 if (d->code == (const enum arm_builtins) fcode)
26001 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26003 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26004 if (d->code == (const enum arm_builtins) fcode)
26005 return arm_expand_ternop_builtin (d->icode, exp, target);
26007 /* @@@ Should really do something sensible here. */
26008 return NULL_RTX;
26011 /* Return the number (counting from 0) of
26012 the least significant set bit in MASK. */
26014 inline static int
26015 number_of_first_bit_set (unsigned mask)
26017 return ctz_hwi (mask);
26020 /* Like emit_multi_reg_push, but allowing for a different set of
26021 registers to be described as saved. MASK is the set of registers
26022 to be saved; REAL_REGS is the set of registers to be described as
26023 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26025 static rtx
26026 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26028 unsigned long regno;
26029 rtx par[10], tmp, reg, insn;
26030 int i, j;
26032 /* Build the parallel of the registers actually being stored. */
26033 for (i = 0; mask; ++i, mask &= mask - 1)
26035 regno = ctz_hwi (mask);
26036 reg = gen_rtx_REG (SImode, regno);
26038 if (i == 0)
26039 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26040 else
26041 tmp = gen_rtx_USE (VOIDmode, reg);
26043 par[i] = tmp;
26046 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26047 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26048 tmp = gen_frame_mem (BLKmode, tmp);
26049 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26050 par[0] = tmp;
26052 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26053 insn = emit_insn (tmp);
26055 /* Always build the stack adjustment note for unwind info. */
26056 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26057 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26058 par[0] = tmp;
26060 /* Build the parallel of the registers recorded as saved for unwind. */
26061 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26063 regno = ctz_hwi (real_regs);
26064 reg = gen_rtx_REG (SImode, regno);
26066 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26067 tmp = gen_frame_mem (SImode, tmp);
26068 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26069 RTX_FRAME_RELATED_P (tmp) = 1;
26070 par[j + 1] = tmp;
26073 if (j == 0)
26074 tmp = par[0];
26075 else
26077 RTX_FRAME_RELATED_P (par[0]) = 1;
26078 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26081 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26083 return insn;
26086 /* Emit code to push or pop registers to or from the stack. F is the
26087 assembly file. MASK is the registers to pop. */
26088 static void
26089 thumb_pop (FILE *f, unsigned long mask)
26091 int regno;
26092 int lo_mask = mask & 0xFF;
26093 int pushed_words = 0;
26095 gcc_assert (mask);
26097 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26099 /* Special case. Do not generate a POP PC statement here, do it in
26100 thumb_exit() */
26101 thumb_exit (f, -1);
26102 return;
26105 fprintf (f, "\tpop\t{");
26107 /* Look at the low registers first. */
26108 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26110 if (lo_mask & 1)
26112 asm_fprintf (f, "%r", regno);
26114 if ((lo_mask & ~1) != 0)
26115 fprintf (f, ", ");
26117 pushed_words++;
26121 if (mask & (1 << PC_REGNUM))
26123 /* Catch popping the PC. */
26124 if (TARGET_INTERWORK || TARGET_BACKTRACE
26125 || crtl->calls_eh_return)
26127 /* The PC is never poped directly, instead
26128 it is popped into r3 and then BX is used. */
26129 fprintf (f, "}\n");
26131 thumb_exit (f, -1);
26133 return;
26135 else
26137 if (mask & 0xFF)
26138 fprintf (f, ", ");
26140 asm_fprintf (f, "%r", PC_REGNUM);
26144 fprintf (f, "}\n");
26147 /* Generate code to return from a thumb function.
26148 If 'reg_containing_return_addr' is -1, then the return address is
26149 actually on the stack, at the stack pointer. */
26150 static void
26151 thumb_exit (FILE *f, int reg_containing_return_addr)
26153 unsigned regs_available_for_popping;
26154 unsigned regs_to_pop;
26155 int pops_needed;
26156 unsigned available;
26157 unsigned required;
26158 enum machine_mode mode;
26159 int size;
26160 int restore_a4 = FALSE;
26162 /* Compute the registers we need to pop. */
26163 regs_to_pop = 0;
26164 pops_needed = 0;
26166 if (reg_containing_return_addr == -1)
26168 regs_to_pop |= 1 << LR_REGNUM;
26169 ++pops_needed;
26172 if (TARGET_BACKTRACE)
26174 /* Restore the (ARM) frame pointer and stack pointer. */
26175 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26176 pops_needed += 2;
26179 /* If there is nothing to pop then just emit the BX instruction and
26180 return. */
26181 if (pops_needed == 0)
26183 if (crtl->calls_eh_return)
26184 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26186 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26187 return;
26189 /* Otherwise if we are not supporting interworking and we have not created
26190 a backtrace structure and the function was not entered in ARM mode then
26191 just pop the return address straight into the PC. */
26192 else if (!TARGET_INTERWORK
26193 && !TARGET_BACKTRACE
26194 && !is_called_in_ARM_mode (current_function_decl)
26195 && !crtl->calls_eh_return)
26197 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26198 return;
26201 /* Find out how many of the (return) argument registers we can corrupt. */
26202 regs_available_for_popping = 0;
26204 /* If returning via __builtin_eh_return, the bottom three registers
26205 all contain information needed for the return. */
26206 if (crtl->calls_eh_return)
26207 size = 12;
26208 else
26210 /* If we can deduce the registers used from the function's
26211 return value. This is more reliable that examining
26212 df_regs_ever_live_p () because that will be set if the register is
26213 ever used in the function, not just if the register is used
26214 to hold a return value. */
26216 if (crtl->return_rtx != 0)
26217 mode = GET_MODE (crtl->return_rtx);
26218 else
26219 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26221 size = GET_MODE_SIZE (mode);
26223 if (size == 0)
26225 /* In a void function we can use any argument register.
26226 In a function that returns a structure on the stack
26227 we can use the second and third argument registers. */
26228 if (mode == VOIDmode)
26229 regs_available_for_popping =
26230 (1 << ARG_REGISTER (1))
26231 | (1 << ARG_REGISTER (2))
26232 | (1 << ARG_REGISTER (3));
26233 else
26234 regs_available_for_popping =
26235 (1 << ARG_REGISTER (2))
26236 | (1 << ARG_REGISTER (3));
26238 else if (size <= 4)
26239 regs_available_for_popping =
26240 (1 << ARG_REGISTER (2))
26241 | (1 << ARG_REGISTER (3));
26242 else if (size <= 8)
26243 regs_available_for_popping =
26244 (1 << ARG_REGISTER (3));
26247 /* Match registers to be popped with registers into which we pop them. */
26248 for (available = regs_available_for_popping,
26249 required = regs_to_pop;
26250 required != 0 && available != 0;
26251 available &= ~(available & - available),
26252 required &= ~(required & - required))
26253 -- pops_needed;
26255 /* If we have any popping registers left over, remove them. */
26256 if (available > 0)
26257 regs_available_for_popping &= ~available;
26259 /* Otherwise if we need another popping register we can use
26260 the fourth argument register. */
26261 else if (pops_needed)
26263 /* If we have not found any free argument registers and
26264 reg a4 contains the return address, we must move it. */
26265 if (regs_available_for_popping == 0
26266 && reg_containing_return_addr == LAST_ARG_REGNUM)
26268 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26269 reg_containing_return_addr = LR_REGNUM;
26271 else if (size > 12)
26273 /* Register a4 is being used to hold part of the return value,
26274 but we have dire need of a free, low register. */
26275 restore_a4 = TRUE;
26277 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26280 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26282 /* The fourth argument register is available. */
26283 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26285 --pops_needed;
26289 /* Pop as many registers as we can. */
26290 thumb_pop (f, regs_available_for_popping);
26292 /* Process the registers we popped. */
26293 if (reg_containing_return_addr == -1)
26295 /* The return address was popped into the lowest numbered register. */
26296 regs_to_pop &= ~(1 << LR_REGNUM);
26298 reg_containing_return_addr =
26299 number_of_first_bit_set (regs_available_for_popping);
26301 /* Remove this register for the mask of available registers, so that
26302 the return address will not be corrupted by further pops. */
26303 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26306 /* If we popped other registers then handle them here. */
26307 if (regs_available_for_popping)
26309 int frame_pointer;
26311 /* Work out which register currently contains the frame pointer. */
26312 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26314 /* Move it into the correct place. */
26315 asm_fprintf (f, "\tmov\t%r, %r\n",
26316 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26318 /* (Temporarily) remove it from the mask of popped registers. */
26319 regs_available_for_popping &= ~(1 << frame_pointer);
26320 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26322 if (regs_available_for_popping)
26324 int stack_pointer;
26326 /* We popped the stack pointer as well,
26327 find the register that contains it. */
26328 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26330 /* Move it into the stack register. */
26331 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26333 /* At this point we have popped all necessary registers, so
26334 do not worry about restoring regs_available_for_popping
26335 to its correct value:
26337 assert (pops_needed == 0)
26338 assert (regs_available_for_popping == (1 << frame_pointer))
26339 assert (regs_to_pop == (1 << STACK_POINTER)) */
26341 else
26343 /* Since we have just move the popped value into the frame
26344 pointer, the popping register is available for reuse, and
26345 we know that we still have the stack pointer left to pop. */
26346 regs_available_for_popping |= (1 << frame_pointer);
26350 /* If we still have registers left on the stack, but we no longer have
26351 any registers into which we can pop them, then we must move the return
26352 address into the link register and make available the register that
26353 contained it. */
26354 if (regs_available_for_popping == 0 && pops_needed > 0)
26356 regs_available_for_popping |= 1 << reg_containing_return_addr;
26358 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26359 reg_containing_return_addr);
26361 reg_containing_return_addr = LR_REGNUM;
26364 /* If we have registers left on the stack then pop some more.
26365 We know that at most we will want to pop FP and SP. */
26366 if (pops_needed > 0)
26368 int popped_into;
26369 int move_to;
26371 thumb_pop (f, regs_available_for_popping);
26373 /* We have popped either FP or SP.
26374 Move whichever one it is into the correct register. */
26375 popped_into = number_of_first_bit_set (regs_available_for_popping);
26376 move_to = number_of_first_bit_set (regs_to_pop);
26378 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26380 regs_to_pop &= ~(1 << move_to);
26382 --pops_needed;
26385 /* If we still have not popped everything then we must have only
26386 had one register available to us and we are now popping the SP. */
26387 if (pops_needed > 0)
26389 int popped_into;
26391 thumb_pop (f, regs_available_for_popping);
26393 popped_into = number_of_first_bit_set (regs_available_for_popping);
26395 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26397 assert (regs_to_pop == (1 << STACK_POINTER))
26398 assert (pops_needed == 1)
26402 /* If necessary restore the a4 register. */
26403 if (restore_a4)
26405 if (reg_containing_return_addr != LR_REGNUM)
26407 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26408 reg_containing_return_addr = LR_REGNUM;
26411 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26414 if (crtl->calls_eh_return)
26415 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26417 /* Return to caller. */
26418 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26421 /* Scan INSN just before assembler is output for it.
26422 For Thumb-1, we track the status of the condition codes; this
26423 information is used in the cbranchsi4_insn pattern. */
26424 void
26425 thumb1_final_prescan_insn (rtx insn)
26427 if (flag_print_asm_name)
26428 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26429 INSN_ADDRESSES (INSN_UID (insn)));
26430 /* Don't overwrite the previous setter when we get to a cbranch. */
26431 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26433 enum attr_conds conds;
26435 if (cfun->machine->thumb1_cc_insn)
26437 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26438 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26439 CC_STATUS_INIT;
26441 conds = get_attr_conds (insn);
26442 if (conds == CONDS_SET)
26444 rtx set = single_set (insn);
26445 cfun->machine->thumb1_cc_insn = insn;
26446 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26447 cfun->machine->thumb1_cc_op1 = const0_rtx;
26448 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26449 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26451 rtx src1 = XEXP (SET_SRC (set), 1);
26452 if (src1 == const0_rtx)
26453 cfun->machine->thumb1_cc_mode = CCmode;
26455 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26457 /* Record the src register operand instead of dest because
26458 cprop_hardreg pass propagates src. */
26459 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26462 else if (conds != CONDS_NOCOND)
26463 cfun->machine->thumb1_cc_insn = NULL_RTX;
26466 /* Check if unexpected far jump is used. */
26467 if (cfun->machine->lr_save_eliminated
26468 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26469 internal_error("Unexpected thumb1 far jump");
26473 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26475 unsigned HOST_WIDE_INT mask = 0xff;
26476 int i;
26478 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26479 if (val == 0) /* XXX */
26480 return 0;
26482 for (i = 0; i < 25; i++)
26483 if ((val & (mask << i)) == val)
26484 return 1;
26486 return 0;
26489 /* Returns nonzero if the current function contains,
26490 or might contain a far jump. */
26491 static int
26492 thumb_far_jump_used_p (void)
26494 rtx insn;
26495 bool far_jump = false;
26496 unsigned int func_size = 0;
26498 /* This test is only important for leaf functions. */
26499 /* assert (!leaf_function_p ()); */
26501 /* If we have already decided that far jumps may be used,
26502 do not bother checking again, and always return true even if
26503 it turns out that they are not being used. Once we have made
26504 the decision that far jumps are present (and that hence the link
26505 register will be pushed onto the stack) we cannot go back on it. */
26506 if (cfun->machine->far_jump_used)
26507 return 1;
26509 /* If this function is not being called from the prologue/epilogue
26510 generation code then it must be being called from the
26511 INITIAL_ELIMINATION_OFFSET macro. */
26512 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26514 /* In this case we know that we are being asked about the elimination
26515 of the arg pointer register. If that register is not being used,
26516 then there are no arguments on the stack, and we do not have to
26517 worry that a far jump might force the prologue to push the link
26518 register, changing the stack offsets. In this case we can just
26519 return false, since the presence of far jumps in the function will
26520 not affect stack offsets.
26522 If the arg pointer is live (or if it was live, but has now been
26523 eliminated and so set to dead) then we do have to test to see if
26524 the function might contain a far jump. This test can lead to some
26525 false negatives, since before reload is completed, then length of
26526 branch instructions is not known, so gcc defaults to returning their
26527 longest length, which in turn sets the far jump attribute to true.
26529 A false negative will not result in bad code being generated, but it
26530 will result in a needless push and pop of the link register. We
26531 hope that this does not occur too often.
26533 If we need doubleword stack alignment this could affect the other
26534 elimination offsets so we can't risk getting it wrong. */
26535 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26536 cfun->machine->arg_pointer_live = 1;
26537 else if (!cfun->machine->arg_pointer_live)
26538 return 0;
26541 /* We should not change far_jump_used during or after reload, as there is
26542 no chance to change stack frame layout. */
26543 if (reload_in_progress || reload_completed)
26544 return 0;
26546 /* Check to see if the function contains a branch
26547 insn with the far jump attribute set. */
26548 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26550 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26552 far_jump = true;
26554 func_size += get_attr_length (insn);
26557 /* Attribute far_jump will always be true for thumb1 before
26558 shorten_branch pass. So checking far_jump attribute before
26559 shorten_branch isn't much useful.
26561 Following heuristic tries to estimate more accurately if a far jump
26562 may finally be used. The heuristic is very conservative as there is
26563 no chance to roll-back the decision of not to use far jump.
26565 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26566 2-byte insn is associated with a 4 byte constant pool. Using
26567 function size 2048/3 as the threshold is conservative enough. */
26568 if (far_jump)
26570 if ((func_size * 3) >= 2048)
26572 /* Record the fact that we have decided that
26573 the function does use far jumps. */
26574 cfun->machine->far_jump_used = 1;
26575 return 1;
26579 return 0;
26582 /* Return nonzero if FUNC must be entered in ARM mode. */
26584 is_called_in_ARM_mode (tree func)
26586 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26588 /* Ignore the problem about functions whose address is taken. */
26589 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26590 return TRUE;
26592 #ifdef ARM_PE
26593 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26594 #else
26595 return FALSE;
26596 #endif
26599 /* Given the stack offsets and register mask in OFFSETS, decide how
26600 many additional registers to push instead of subtracting a constant
26601 from SP. For epilogues the principle is the same except we use pop.
26602 FOR_PROLOGUE indicates which we're generating. */
26603 static int
26604 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26606 HOST_WIDE_INT amount;
26607 unsigned long live_regs_mask = offsets->saved_regs_mask;
26608 /* Extract a mask of the ones we can give to the Thumb's push/pop
26609 instruction. */
26610 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26611 /* Then count how many other high registers will need to be pushed. */
26612 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26613 int n_free, reg_base, size;
26615 if (!for_prologue && frame_pointer_needed)
26616 amount = offsets->locals_base - offsets->saved_regs;
26617 else
26618 amount = offsets->outgoing_args - offsets->saved_regs;
26620 /* If the stack frame size is 512 exactly, we can save one load
26621 instruction, which should make this a win even when optimizing
26622 for speed. */
26623 if (!optimize_size && amount != 512)
26624 return 0;
26626 /* Can't do this if there are high registers to push. */
26627 if (high_regs_pushed != 0)
26628 return 0;
26630 /* Shouldn't do it in the prologue if no registers would normally
26631 be pushed at all. In the epilogue, also allow it if we'll have
26632 a pop insn for the PC. */
26633 if (l_mask == 0
26634 && (for_prologue
26635 || TARGET_BACKTRACE
26636 || (live_regs_mask & 1 << LR_REGNUM) == 0
26637 || TARGET_INTERWORK
26638 || crtl->args.pretend_args_size != 0))
26639 return 0;
26641 /* Don't do this if thumb_expand_prologue wants to emit instructions
26642 between the push and the stack frame allocation. */
26643 if (for_prologue
26644 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26645 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26646 return 0;
26648 reg_base = 0;
26649 n_free = 0;
26650 if (!for_prologue)
26652 size = arm_size_return_regs ();
26653 reg_base = ARM_NUM_INTS (size);
26654 live_regs_mask >>= reg_base;
26657 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26658 && (for_prologue || call_used_regs[reg_base + n_free]))
26660 live_regs_mask >>= 1;
26661 n_free++;
26664 if (n_free == 0)
26665 return 0;
26666 gcc_assert (amount / 4 * 4 == amount);
26668 if (amount >= 512 && (amount - n_free * 4) < 512)
26669 return (amount - 508) / 4;
26670 if (amount <= n_free * 4)
26671 return amount / 4;
26672 return 0;
26675 /* The bits which aren't usefully expanded as rtl. */
26676 const char *
26677 thumb1_unexpanded_epilogue (void)
26679 arm_stack_offsets *offsets;
26680 int regno;
26681 unsigned long live_regs_mask = 0;
26682 int high_regs_pushed = 0;
26683 int extra_pop;
26684 int had_to_push_lr;
26685 int size;
26687 if (cfun->machine->return_used_this_function != 0)
26688 return "";
26690 if (IS_NAKED (arm_current_func_type ()))
26691 return "";
26693 offsets = arm_get_frame_offsets ();
26694 live_regs_mask = offsets->saved_regs_mask;
26695 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26697 /* If we can deduce the registers used from the function's return value.
26698 This is more reliable that examining df_regs_ever_live_p () because that
26699 will be set if the register is ever used in the function, not just if
26700 the register is used to hold a return value. */
26701 size = arm_size_return_regs ();
26703 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26704 if (extra_pop > 0)
26706 unsigned long extra_mask = (1 << extra_pop) - 1;
26707 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26710 /* The prolog may have pushed some high registers to use as
26711 work registers. e.g. the testsuite file:
26712 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26713 compiles to produce:
26714 push {r4, r5, r6, r7, lr}
26715 mov r7, r9
26716 mov r6, r8
26717 push {r6, r7}
26718 as part of the prolog. We have to undo that pushing here. */
26720 if (high_regs_pushed)
26722 unsigned long mask = live_regs_mask & 0xff;
26723 int next_hi_reg;
26725 /* The available low registers depend on the size of the value we are
26726 returning. */
26727 if (size <= 12)
26728 mask |= 1 << 3;
26729 if (size <= 8)
26730 mask |= 1 << 2;
26732 if (mask == 0)
26733 /* Oh dear! We have no low registers into which we can pop
26734 high registers! */
26735 internal_error
26736 ("no low registers available for popping high registers");
26738 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26739 if (live_regs_mask & (1 << next_hi_reg))
26740 break;
26742 while (high_regs_pushed)
26744 /* Find lo register(s) into which the high register(s) can
26745 be popped. */
26746 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26748 if (mask & (1 << regno))
26749 high_regs_pushed--;
26750 if (high_regs_pushed == 0)
26751 break;
26754 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26756 /* Pop the values into the low register(s). */
26757 thumb_pop (asm_out_file, mask);
26759 /* Move the value(s) into the high registers. */
26760 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26762 if (mask & (1 << regno))
26764 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26765 regno);
26767 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26768 if (live_regs_mask & (1 << next_hi_reg))
26769 break;
26773 live_regs_mask &= ~0x0f00;
26776 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26777 live_regs_mask &= 0xff;
26779 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26781 /* Pop the return address into the PC. */
26782 if (had_to_push_lr)
26783 live_regs_mask |= 1 << PC_REGNUM;
26785 /* Either no argument registers were pushed or a backtrace
26786 structure was created which includes an adjusted stack
26787 pointer, so just pop everything. */
26788 if (live_regs_mask)
26789 thumb_pop (asm_out_file, live_regs_mask);
26791 /* We have either just popped the return address into the
26792 PC or it is was kept in LR for the entire function.
26793 Note that thumb_pop has already called thumb_exit if the
26794 PC was in the list. */
26795 if (!had_to_push_lr)
26796 thumb_exit (asm_out_file, LR_REGNUM);
26798 else
26800 /* Pop everything but the return address. */
26801 if (live_regs_mask)
26802 thumb_pop (asm_out_file, live_regs_mask);
26804 if (had_to_push_lr)
26806 if (size > 12)
26808 /* We have no free low regs, so save one. */
26809 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26810 LAST_ARG_REGNUM);
26813 /* Get the return address into a temporary register. */
26814 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26816 if (size > 12)
26818 /* Move the return address to lr. */
26819 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26820 LAST_ARG_REGNUM);
26821 /* Restore the low register. */
26822 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26823 IP_REGNUM);
26824 regno = LR_REGNUM;
26826 else
26827 regno = LAST_ARG_REGNUM;
26829 else
26830 regno = LR_REGNUM;
26832 /* Remove the argument registers that were pushed onto the stack. */
26833 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26834 SP_REGNUM, SP_REGNUM,
26835 crtl->args.pretend_args_size);
26837 thumb_exit (asm_out_file, regno);
26840 return "";
26843 /* Functions to save and restore machine-specific function data. */
26844 static struct machine_function *
26845 arm_init_machine_status (void)
26847 struct machine_function *machine;
26848 machine = ggc_cleared_alloc<machine_function> ();
26850 #if ARM_FT_UNKNOWN != 0
26851 machine->func_type = ARM_FT_UNKNOWN;
26852 #endif
26853 return machine;
26856 /* Return an RTX indicating where the return address to the
26857 calling function can be found. */
26859 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26861 if (count != 0)
26862 return NULL_RTX;
26864 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26867 /* Do anything needed before RTL is emitted for each function. */
26868 void
26869 arm_init_expanders (void)
26871 /* Arrange to initialize and mark the machine per-function status. */
26872 init_machine_status = arm_init_machine_status;
26874 /* This is to stop the combine pass optimizing away the alignment
26875 adjustment of va_arg. */
26876 /* ??? It is claimed that this should not be necessary. */
26877 if (cfun)
26878 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26882 /* Like arm_compute_initial_elimination offset. Simpler because there
26883 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26884 to point at the base of the local variables after static stack
26885 space for a function has been allocated. */
26887 HOST_WIDE_INT
26888 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26890 arm_stack_offsets *offsets;
26892 offsets = arm_get_frame_offsets ();
26894 switch (from)
26896 case ARG_POINTER_REGNUM:
26897 switch (to)
26899 case STACK_POINTER_REGNUM:
26900 return offsets->outgoing_args - offsets->saved_args;
26902 case FRAME_POINTER_REGNUM:
26903 return offsets->soft_frame - offsets->saved_args;
26905 case ARM_HARD_FRAME_POINTER_REGNUM:
26906 return offsets->saved_regs - offsets->saved_args;
26908 case THUMB_HARD_FRAME_POINTER_REGNUM:
26909 return offsets->locals_base - offsets->saved_args;
26911 default:
26912 gcc_unreachable ();
26914 break;
26916 case FRAME_POINTER_REGNUM:
26917 switch (to)
26919 case STACK_POINTER_REGNUM:
26920 return offsets->outgoing_args - offsets->soft_frame;
26922 case ARM_HARD_FRAME_POINTER_REGNUM:
26923 return offsets->saved_regs - offsets->soft_frame;
26925 case THUMB_HARD_FRAME_POINTER_REGNUM:
26926 return offsets->locals_base - offsets->soft_frame;
26928 default:
26929 gcc_unreachable ();
26931 break;
26933 default:
26934 gcc_unreachable ();
26938 /* Generate the function's prologue. */
26940 void
26941 thumb1_expand_prologue (void)
26943 rtx insn;
26945 HOST_WIDE_INT amount;
26946 arm_stack_offsets *offsets;
26947 unsigned long func_type;
26948 int regno;
26949 unsigned long live_regs_mask;
26950 unsigned long l_mask;
26951 unsigned high_regs_pushed = 0;
26953 func_type = arm_current_func_type ();
26955 /* Naked functions don't have prologues. */
26956 if (IS_NAKED (func_type))
26957 return;
26959 if (IS_INTERRUPT (func_type))
26961 error ("interrupt Service Routines cannot be coded in Thumb mode");
26962 return;
26965 if (is_called_in_ARM_mode (current_function_decl))
26966 emit_insn (gen_prologue_thumb1_interwork ());
26968 offsets = arm_get_frame_offsets ();
26969 live_regs_mask = offsets->saved_regs_mask;
26971 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26972 l_mask = live_regs_mask & 0x40ff;
26973 /* Then count how many other high registers will need to be pushed. */
26974 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26976 if (crtl->args.pretend_args_size)
26978 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26980 if (cfun->machine->uses_anonymous_args)
26982 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26983 unsigned long mask;
26985 mask = 1ul << (LAST_ARG_REGNUM + 1);
26986 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26988 insn = thumb1_emit_multi_reg_push (mask, 0);
26990 else
26992 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26993 stack_pointer_rtx, x));
26995 RTX_FRAME_RELATED_P (insn) = 1;
26998 if (TARGET_BACKTRACE)
27000 HOST_WIDE_INT offset = 0;
27001 unsigned work_register;
27002 rtx work_reg, x, arm_hfp_rtx;
27004 /* We have been asked to create a stack backtrace structure.
27005 The code looks like this:
27007 0 .align 2
27008 0 func:
27009 0 sub SP, #16 Reserve space for 4 registers.
27010 2 push {R7} Push low registers.
27011 4 add R7, SP, #20 Get the stack pointer before the push.
27012 6 str R7, [SP, #8] Store the stack pointer
27013 (before reserving the space).
27014 8 mov R7, PC Get hold of the start of this code + 12.
27015 10 str R7, [SP, #16] Store it.
27016 12 mov R7, FP Get hold of the current frame pointer.
27017 14 str R7, [SP, #4] Store it.
27018 16 mov R7, LR Get hold of the current return address.
27019 18 str R7, [SP, #12] Store it.
27020 20 add R7, SP, #16 Point at the start of the
27021 backtrace structure.
27022 22 mov FP, R7 Put this value into the frame pointer. */
27024 work_register = thumb_find_work_register (live_regs_mask);
27025 work_reg = gen_rtx_REG (SImode, work_register);
27026 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27028 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27029 stack_pointer_rtx, GEN_INT (-16)));
27030 RTX_FRAME_RELATED_P (insn) = 1;
27032 if (l_mask)
27034 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27035 RTX_FRAME_RELATED_P (insn) = 1;
27037 offset = bit_count (l_mask) * UNITS_PER_WORD;
27040 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27041 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27043 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27044 x = gen_frame_mem (SImode, x);
27045 emit_move_insn (x, work_reg);
27047 /* Make sure that the instruction fetching the PC is in the right place
27048 to calculate "start of backtrace creation code + 12". */
27049 /* ??? The stores using the common WORK_REG ought to be enough to
27050 prevent the scheduler from doing anything weird. Failing that
27051 we could always move all of the following into an UNSPEC_VOLATILE. */
27052 if (l_mask)
27054 x = gen_rtx_REG (SImode, PC_REGNUM);
27055 emit_move_insn (work_reg, x);
27057 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27058 x = gen_frame_mem (SImode, x);
27059 emit_move_insn (x, work_reg);
27061 emit_move_insn (work_reg, arm_hfp_rtx);
27063 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27064 x = gen_frame_mem (SImode, x);
27065 emit_move_insn (x, work_reg);
27067 else
27069 emit_move_insn (work_reg, arm_hfp_rtx);
27071 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27072 x = gen_frame_mem (SImode, x);
27073 emit_move_insn (x, work_reg);
27075 x = gen_rtx_REG (SImode, PC_REGNUM);
27076 emit_move_insn (work_reg, x);
27078 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27079 x = gen_frame_mem (SImode, x);
27080 emit_move_insn (x, work_reg);
27083 x = gen_rtx_REG (SImode, LR_REGNUM);
27084 emit_move_insn (work_reg, x);
27086 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27087 x = gen_frame_mem (SImode, x);
27088 emit_move_insn (x, work_reg);
27090 x = GEN_INT (offset + 12);
27091 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27093 emit_move_insn (arm_hfp_rtx, work_reg);
27095 /* Optimization: If we are not pushing any low registers but we are going
27096 to push some high registers then delay our first push. This will just
27097 be a push of LR and we can combine it with the push of the first high
27098 register. */
27099 else if ((l_mask & 0xff) != 0
27100 || (high_regs_pushed == 0 && l_mask))
27102 unsigned long mask = l_mask;
27103 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27104 insn = thumb1_emit_multi_reg_push (mask, mask);
27105 RTX_FRAME_RELATED_P (insn) = 1;
27108 if (high_regs_pushed)
27110 unsigned pushable_regs;
27111 unsigned next_hi_reg;
27112 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27113 : crtl->args.info.nregs;
27114 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27116 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27117 if (live_regs_mask & (1 << next_hi_reg))
27118 break;
27120 /* Here we need to mask out registers used for passing arguments
27121 even if they can be pushed. This is to avoid using them to stash the high
27122 registers. Such kind of stash may clobber the use of arguments. */
27123 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27125 if (pushable_regs == 0)
27126 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27128 while (high_regs_pushed > 0)
27130 unsigned long real_regs_mask = 0;
27132 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27134 if (pushable_regs & (1 << regno))
27136 emit_move_insn (gen_rtx_REG (SImode, regno),
27137 gen_rtx_REG (SImode, next_hi_reg));
27139 high_regs_pushed --;
27140 real_regs_mask |= (1 << next_hi_reg);
27142 if (high_regs_pushed)
27144 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27145 next_hi_reg --)
27146 if (live_regs_mask & (1 << next_hi_reg))
27147 break;
27149 else
27151 pushable_regs &= ~((1 << regno) - 1);
27152 break;
27157 /* If we had to find a work register and we have not yet
27158 saved the LR then add it to the list of regs to push. */
27159 if (l_mask == (1 << LR_REGNUM))
27161 pushable_regs |= l_mask;
27162 real_regs_mask |= l_mask;
27163 l_mask = 0;
27166 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27167 RTX_FRAME_RELATED_P (insn) = 1;
27171 /* Load the pic register before setting the frame pointer,
27172 so we can use r7 as a temporary work register. */
27173 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27174 arm_load_pic_register (live_regs_mask);
27176 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27177 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27178 stack_pointer_rtx);
27180 if (flag_stack_usage_info)
27181 current_function_static_stack_size
27182 = offsets->outgoing_args - offsets->saved_args;
27184 amount = offsets->outgoing_args - offsets->saved_regs;
27185 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27186 if (amount)
27188 if (amount < 512)
27190 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27191 GEN_INT (- amount)));
27192 RTX_FRAME_RELATED_P (insn) = 1;
27194 else
27196 rtx reg, dwarf;
27198 /* The stack decrement is too big for an immediate value in a single
27199 insn. In theory we could issue multiple subtracts, but after
27200 three of them it becomes more space efficient to place the full
27201 value in the constant pool and load into a register. (Also the
27202 ARM debugger really likes to see only one stack decrement per
27203 function). So instead we look for a scratch register into which
27204 we can load the decrement, and then we subtract this from the
27205 stack pointer. Unfortunately on the thumb the only available
27206 scratch registers are the argument registers, and we cannot use
27207 these as they may hold arguments to the function. Instead we
27208 attempt to locate a call preserved register which is used by this
27209 function. If we can find one, then we know that it will have
27210 been pushed at the start of the prologue and so we can corrupt
27211 it now. */
27212 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27213 if (live_regs_mask & (1 << regno))
27214 break;
27216 gcc_assert(regno <= LAST_LO_REGNUM);
27218 reg = gen_rtx_REG (SImode, regno);
27220 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27222 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27223 stack_pointer_rtx, reg));
27225 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27226 plus_constant (Pmode, stack_pointer_rtx,
27227 -amount));
27228 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27229 RTX_FRAME_RELATED_P (insn) = 1;
27233 if (frame_pointer_needed)
27234 thumb_set_frame_pointer (offsets);
27236 /* If we are profiling, make sure no instructions are scheduled before
27237 the call to mcount. Similarly if the user has requested no
27238 scheduling in the prolog. Similarly if we want non-call exceptions
27239 using the EABI unwinder, to prevent faulting instructions from being
27240 swapped with a stack adjustment. */
27241 if (crtl->profile || !TARGET_SCHED_PROLOG
27242 || (arm_except_unwind_info (&global_options) == UI_TARGET
27243 && cfun->can_throw_non_call_exceptions))
27244 emit_insn (gen_blockage ());
27246 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27247 if (live_regs_mask & 0xff)
27248 cfun->machine->lr_save_eliminated = 0;
27251 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27252 POP instruction can be generated. LR should be replaced by PC. All
27253 the checks required are already done by USE_RETURN_INSN (). Hence,
27254 all we really need to check here is if single register is to be
27255 returned, or multiple register return. */
27256 void
27257 thumb2_expand_return (bool simple_return)
27259 int i, num_regs;
27260 unsigned long saved_regs_mask;
27261 arm_stack_offsets *offsets;
27263 offsets = arm_get_frame_offsets ();
27264 saved_regs_mask = offsets->saved_regs_mask;
27266 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27267 if (saved_regs_mask & (1 << i))
27268 num_regs++;
27270 if (!simple_return && saved_regs_mask)
27272 if (num_regs == 1)
27274 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27275 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27276 rtx addr = gen_rtx_MEM (SImode,
27277 gen_rtx_POST_INC (SImode,
27278 stack_pointer_rtx));
27279 set_mem_alias_set (addr, get_frame_alias_set ());
27280 XVECEXP (par, 0, 0) = ret_rtx;
27281 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27282 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27283 emit_jump_insn (par);
27285 else
27287 saved_regs_mask &= ~ (1 << LR_REGNUM);
27288 saved_regs_mask |= (1 << PC_REGNUM);
27289 arm_emit_multi_reg_pop (saved_regs_mask);
27292 else
27294 emit_jump_insn (simple_return_rtx);
27298 void
27299 thumb1_expand_epilogue (void)
27301 HOST_WIDE_INT amount;
27302 arm_stack_offsets *offsets;
27303 int regno;
27305 /* Naked functions don't have prologues. */
27306 if (IS_NAKED (arm_current_func_type ()))
27307 return;
27309 offsets = arm_get_frame_offsets ();
27310 amount = offsets->outgoing_args - offsets->saved_regs;
27312 if (frame_pointer_needed)
27314 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27315 amount = offsets->locals_base - offsets->saved_regs;
27317 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27319 gcc_assert (amount >= 0);
27320 if (amount)
27322 emit_insn (gen_blockage ());
27324 if (amount < 512)
27325 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27326 GEN_INT (amount)));
27327 else
27329 /* r3 is always free in the epilogue. */
27330 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27332 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27333 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27337 /* Emit a USE (stack_pointer_rtx), so that
27338 the stack adjustment will not be deleted. */
27339 emit_insn (gen_force_register_use (stack_pointer_rtx));
27341 if (crtl->profile || !TARGET_SCHED_PROLOG)
27342 emit_insn (gen_blockage ());
27344 /* Emit a clobber for each insn that will be restored in the epilogue,
27345 so that flow2 will get register lifetimes correct. */
27346 for (regno = 0; regno < 13; regno++)
27347 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27348 emit_clobber (gen_rtx_REG (SImode, regno));
27350 if (! df_regs_ever_live_p (LR_REGNUM))
27351 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27354 /* Epilogue code for APCS frame. */
27355 static void
27356 arm_expand_epilogue_apcs_frame (bool really_return)
27358 unsigned long func_type;
27359 unsigned long saved_regs_mask;
27360 int num_regs = 0;
27361 int i;
27362 int floats_from_frame = 0;
27363 arm_stack_offsets *offsets;
27365 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27366 func_type = arm_current_func_type ();
27368 /* Get frame offsets for ARM. */
27369 offsets = arm_get_frame_offsets ();
27370 saved_regs_mask = offsets->saved_regs_mask;
27372 /* Find the offset of the floating-point save area in the frame. */
27373 floats_from_frame
27374 = (offsets->saved_args
27375 + arm_compute_static_chain_stack_bytes ()
27376 - offsets->frame);
27378 /* Compute how many core registers saved and how far away the floats are. */
27379 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27380 if (saved_regs_mask & (1 << i))
27382 num_regs++;
27383 floats_from_frame += 4;
27386 if (TARGET_HARD_FLOAT && TARGET_VFP)
27388 int start_reg;
27389 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27391 /* The offset is from IP_REGNUM. */
27392 int saved_size = arm_get_vfp_saved_size ();
27393 if (saved_size > 0)
27395 rtx insn;
27396 floats_from_frame += saved_size;
27397 insn = emit_insn (gen_addsi3 (ip_rtx,
27398 hard_frame_pointer_rtx,
27399 GEN_INT (-floats_from_frame)));
27400 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27401 ip_rtx, hard_frame_pointer_rtx);
27404 /* Generate VFP register multi-pop. */
27405 start_reg = FIRST_VFP_REGNUM;
27407 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27408 /* Look for a case where a reg does not need restoring. */
27409 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27410 && (!df_regs_ever_live_p (i + 1)
27411 || call_used_regs[i + 1]))
27413 if (start_reg != i)
27414 arm_emit_vfp_multi_reg_pop (start_reg,
27415 (i - start_reg) / 2,
27416 gen_rtx_REG (SImode,
27417 IP_REGNUM));
27418 start_reg = i + 2;
27421 /* Restore the remaining regs that we have discovered (or possibly
27422 even all of them, if the conditional in the for loop never
27423 fired). */
27424 if (start_reg != i)
27425 arm_emit_vfp_multi_reg_pop (start_reg,
27426 (i - start_reg) / 2,
27427 gen_rtx_REG (SImode, IP_REGNUM));
27430 if (TARGET_IWMMXT)
27432 /* The frame pointer is guaranteed to be non-double-word aligned, as
27433 it is set to double-word-aligned old_stack_pointer - 4. */
27434 rtx insn;
27435 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27437 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27438 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27440 rtx addr = gen_frame_mem (V2SImode,
27441 plus_constant (Pmode, hard_frame_pointer_rtx,
27442 - lrm_count * 4));
27443 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27444 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27445 gen_rtx_REG (V2SImode, i),
27446 NULL_RTX);
27447 lrm_count += 2;
27451 /* saved_regs_mask should contain IP which contains old stack pointer
27452 at the time of activation creation. Since SP and IP are adjacent registers,
27453 we can restore the value directly into SP. */
27454 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27455 saved_regs_mask &= ~(1 << IP_REGNUM);
27456 saved_regs_mask |= (1 << SP_REGNUM);
27458 /* There are two registers left in saved_regs_mask - LR and PC. We
27459 only need to restore LR (the return address), but to
27460 save time we can load it directly into PC, unless we need a
27461 special function exit sequence, or we are not really returning. */
27462 if (really_return
27463 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27464 && !crtl->calls_eh_return)
27465 /* Delete LR from the register mask, so that LR on
27466 the stack is loaded into the PC in the register mask. */
27467 saved_regs_mask &= ~(1 << LR_REGNUM);
27468 else
27469 saved_regs_mask &= ~(1 << PC_REGNUM);
27471 num_regs = bit_count (saved_regs_mask);
27472 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27474 rtx insn;
27475 emit_insn (gen_blockage ());
27476 /* Unwind the stack to just below the saved registers. */
27477 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27478 hard_frame_pointer_rtx,
27479 GEN_INT (- 4 * num_regs)));
27481 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27482 stack_pointer_rtx, hard_frame_pointer_rtx);
27485 arm_emit_multi_reg_pop (saved_regs_mask);
27487 if (IS_INTERRUPT (func_type))
27489 /* Interrupt handlers will have pushed the
27490 IP onto the stack, so restore it now. */
27491 rtx insn;
27492 rtx addr = gen_rtx_MEM (SImode,
27493 gen_rtx_POST_INC (SImode,
27494 stack_pointer_rtx));
27495 set_mem_alias_set (addr, get_frame_alias_set ());
27496 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27497 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27498 gen_rtx_REG (SImode, IP_REGNUM),
27499 NULL_RTX);
27502 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27503 return;
27505 if (crtl->calls_eh_return)
27506 emit_insn (gen_addsi3 (stack_pointer_rtx,
27507 stack_pointer_rtx,
27508 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27510 if (IS_STACKALIGN (func_type))
27511 /* Restore the original stack pointer. Before prologue, the stack was
27512 realigned and the original stack pointer saved in r0. For details,
27513 see comment in arm_expand_prologue. */
27514 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27516 emit_jump_insn (simple_return_rtx);
27519 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27520 function is not a sibcall. */
27521 void
27522 arm_expand_epilogue (bool really_return)
27524 unsigned long func_type;
27525 unsigned long saved_regs_mask;
27526 int num_regs = 0;
27527 int i;
27528 int amount;
27529 arm_stack_offsets *offsets;
27531 func_type = arm_current_func_type ();
27533 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27534 let output_return_instruction take care of instruction emission if any. */
27535 if (IS_NAKED (func_type)
27536 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27538 if (really_return)
27539 emit_jump_insn (simple_return_rtx);
27540 return;
27543 /* If we are throwing an exception, then we really must be doing a
27544 return, so we can't tail-call. */
27545 gcc_assert (!crtl->calls_eh_return || really_return);
27547 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27549 arm_expand_epilogue_apcs_frame (really_return);
27550 return;
27553 /* Get frame offsets for ARM. */
27554 offsets = arm_get_frame_offsets ();
27555 saved_regs_mask = offsets->saved_regs_mask;
27556 num_regs = bit_count (saved_regs_mask);
27558 if (frame_pointer_needed)
27560 rtx insn;
27561 /* Restore stack pointer if necessary. */
27562 if (TARGET_ARM)
27564 /* In ARM mode, frame pointer points to first saved register.
27565 Restore stack pointer to last saved register. */
27566 amount = offsets->frame - offsets->saved_regs;
27568 /* Force out any pending memory operations that reference stacked data
27569 before stack de-allocation occurs. */
27570 emit_insn (gen_blockage ());
27571 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27572 hard_frame_pointer_rtx,
27573 GEN_INT (amount)));
27574 arm_add_cfa_adjust_cfa_note (insn, amount,
27575 stack_pointer_rtx,
27576 hard_frame_pointer_rtx);
27578 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27579 deleted. */
27580 emit_insn (gen_force_register_use (stack_pointer_rtx));
27582 else
27584 /* In Thumb-2 mode, the frame pointer points to the last saved
27585 register. */
27586 amount = offsets->locals_base - offsets->saved_regs;
27587 if (amount)
27589 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27590 hard_frame_pointer_rtx,
27591 GEN_INT (amount)));
27592 arm_add_cfa_adjust_cfa_note (insn, amount,
27593 hard_frame_pointer_rtx,
27594 hard_frame_pointer_rtx);
27597 /* Force out any pending memory operations that reference stacked data
27598 before stack de-allocation occurs. */
27599 emit_insn (gen_blockage ());
27600 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27601 hard_frame_pointer_rtx));
27602 arm_add_cfa_adjust_cfa_note (insn, 0,
27603 stack_pointer_rtx,
27604 hard_frame_pointer_rtx);
27605 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27606 deleted. */
27607 emit_insn (gen_force_register_use (stack_pointer_rtx));
27610 else
27612 /* Pop off outgoing args and local frame to adjust stack pointer to
27613 last saved register. */
27614 amount = offsets->outgoing_args - offsets->saved_regs;
27615 if (amount)
27617 rtx tmp;
27618 /* Force out any pending memory operations that reference stacked data
27619 before stack de-allocation occurs. */
27620 emit_insn (gen_blockage ());
27621 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27622 stack_pointer_rtx,
27623 GEN_INT (amount)));
27624 arm_add_cfa_adjust_cfa_note (tmp, amount,
27625 stack_pointer_rtx, stack_pointer_rtx);
27626 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27627 not deleted. */
27628 emit_insn (gen_force_register_use (stack_pointer_rtx));
27632 if (TARGET_HARD_FLOAT && TARGET_VFP)
27634 /* Generate VFP register multi-pop. */
27635 int end_reg = LAST_VFP_REGNUM + 1;
27637 /* Scan the registers in reverse order. We need to match
27638 any groupings made in the prologue and generate matching
27639 vldm operations. The need to match groups is because,
27640 unlike pop, vldm can only do consecutive regs. */
27641 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27642 /* Look for a case where a reg does not need restoring. */
27643 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27644 && (!df_regs_ever_live_p (i + 1)
27645 || call_used_regs[i + 1]))
27647 /* Restore the regs discovered so far (from reg+2 to
27648 end_reg). */
27649 if (end_reg > i + 2)
27650 arm_emit_vfp_multi_reg_pop (i + 2,
27651 (end_reg - (i + 2)) / 2,
27652 stack_pointer_rtx);
27653 end_reg = i;
27656 /* Restore the remaining regs that we have discovered (or possibly
27657 even all of them, if the conditional in the for loop never
27658 fired). */
27659 if (end_reg > i + 2)
27660 arm_emit_vfp_multi_reg_pop (i + 2,
27661 (end_reg - (i + 2)) / 2,
27662 stack_pointer_rtx);
27665 if (TARGET_IWMMXT)
27666 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27667 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27669 rtx insn;
27670 rtx addr = gen_rtx_MEM (V2SImode,
27671 gen_rtx_POST_INC (SImode,
27672 stack_pointer_rtx));
27673 set_mem_alias_set (addr, get_frame_alias_set ());
27674 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27675 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27676 gen_rtx_REG (V2SImode, i),
27677 NULL_RTX);
27678 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27679 stack_pointer_rtx, stack_pointer_rtx);
27682 if (saved_regs_mask)
27684 rtx insn;
27685 bool return_in_pc = false;
27687 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27688 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27689 && !IS_STACKALIGN (func_type)
27690 && really_return
27691 && crtl->args.pretend_args_size == 0
27692 && saved_regs_mask & (1 << LR_REGNUM)
27693 && !crtl->calls_eh_return)
27695 saved_regs_mask &= ~(1 << LR_REGNUM);
27696 saved_regs_mask |= (1 << PC_REGNUM);
27697 return_in_pc = true;
27700 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27702 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27703 if (saved_regs_mask & (1 << i))
27705 rtx addr = gen_rtx_MEM (SImode,
27706 gen_rtx_POST_INC (SImode,
27707 stack_pointer_rtx));
27708 set_mem_alias_set (addr, get_frame_alias_set ());
27710 if (i == PC_REGNUM)
27712 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27713 XVECEXP (insn, 0, 0) = ret_rtx;
27714 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27715 gen_rtx_REG (SImode, i),
27716 addr);
27717 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27718 insn = emit_jump_insn (insn);
27720 else
27722 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27723 addr));
27724 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27725 gen_rtx_REG (SImode, i),
27726 NULL_RTX);
27727 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27728 stack_pointer_rtx,
27729 stack_pointer_rtx);
27733 else
27735 if (TARGET_LDRD
27736 && current_tune->prefer_ldrd_strd
27737 && !optimize_function_for_size_p (cfun))
27739 if (TARGET_THUMB2)
27740 thumb2_emit_ldrd_pop (saved_regs_mask);
27741 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27742 arm_emit_ldrd_pop (saved_regs_mask);
27743 else
27744 arm_emit_multi_reg_pop (saved_regs_mask);
27746 else
27747 arm_emit_multi_reg_pop (saved_regs_mask);
27750 if (return_in_pc == true)
27751 return;
27754 if (crtl->args.pretend_args_size)
27756 int i, j;
27757 rtx dwarf = NULL_RTX;
27758 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27759 stack_pointer_rtx,
27760 GEN_INT (crtl->args.pretend_args_size)));
27762 RTX_FRAME_RELATED_P (tmp) = 1;
27764 if (cfun->machine->uses_anonymous_args)
27766 /* Restore pretend args. Refer arm_expand_prologue on how to save
27767 pretend_args in stack. */
27768 int num_regs = crtl->args.pretend_args_size / 4;
27769 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27770 for (j = 0, i = 0; j < num_regs; i++)
27771 if (saved_regs_mask & (1 << i))
27773 rtx reg = gen_rtx_REG (SImode, i);
27774 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27775 j++;
27777 REG_NOTES (tmp) = dwarf;
27779 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27780 stack_pointer_rtx, stack_pointer_rtx);
27783 if (!really_return)
27784 return;
27786 if (crtl->calls_eh_return)
27787 emit_insn (gen_addsi3 (stack_pointer_rtx,
27788 stack_pointer_rtx,
27789 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27791 if (IS_STACKALIGN (func_type))
27792 /* Restore the original stack pointer. Before prologue, the stack was
27793 realigned and the original stack pointer saved in r0. For details,
27794 see comment in arm_expand_prologue. */
27795 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27797 emit_jump_insn (simple_return_rtx);
27800 /* Implementation of insn prologue_thumb1_interwork. This is the first
27801 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27803 const char *
27804 thumb1_output_interwork (void)
27806 const char * name;
27807 FILE *f = asm_out_file;
27809 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27810 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27811 == SYMBOL_REF);
27812 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27814 /* Generate code sequence to switch us into Thumb mode. */
27815 /* The .code 32 directive has already been emitted by
27816 ASM_DECLARE_FUNCTION_NAME. */
27817 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27818 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27820 /* Generate a label, so that the debugger will notice the
27821 change in instruction sets. This label is also used by
27822 the assembler to bypass the ARM code when this function
27823 is called from a Thumb encoded function elsewhere in the
27824 same file. Hence the definition of STUB_NAME here must
27825 agree with the definition in gas/config/tc-arm.c. */
27827 #define STUB_NAME ".real_start_of"
27829 fprintf (f, "\t.code\t16\n");
27830 #ifdef ARM_PE
27831 if (arm_dllexport_name_p (name))
27832 name = arm_strip_name_encoding (name);
27833 #endif
27834 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27835 fprintf (f, "\t.thumb_func\n");
27836 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27838 return "";
27841 /* Handle the case of a double word load into a low register from
27842 a computed memory address. The computed address may involve a
27843 register which is overwritten by the load. */
27844 const char *
27845 thumb_load_double_from_address (rtx *operands)
27847 rtx addr;
27848 rtx base;
27849 rtx offset;
27850 rtx arg1;
27851 rtx arg2;
27853 gcc_assert (REG_P (operands[0]));
27854 gcc_assert (MEM_P (operands[1]));
27856 /* Get the memory address. */
27857 addr = XEXP (operands[1], 0);
27859 /* Work out how the memory address is computed. */
27860 switch (GET_CODE (addr))
27862 case REG:
27863 operands[2] = adjust_address (operands[1], SImode, 4);
27865 if (REGNO (operands[0]) == REGNO (addr))
27867 output_asm_insn ("ldr\t%H0, %2", operands);
27868 output_asm_insn ("ldr\t%0, %1", operands);
27870 else
27872 output_asm_insn ("ldr\t%0, %1", operands);
27873 output_asm_insn ("ldr\t%H0, %2", operands);
27875 break;
27877 case CONST:
27878 /* Compute <address> + 4 for the high order load. */
27879 operands[2] = adjust_address (operands[1], SImode, 4);
27881 output_asm_insn ("ldr\t%0, %1", operands);
27882 output_asm_insn ("ldr\t%H0, %2", operands);
27883 break;
27885 case PLUS:
27886 arg1 = XEXP (addr, 0);
27887 arg2 = XEXP (addr, 1);
27889 if (CONSTANT_P (arg1))
27890 base = arg2, offset = arg1;
27891 else
27892 base = arg1, offset = arg2;
27894 gcc_assert (REG_P (base));
27896 /* Catch the case of <address> = <reg> + <reg> */
27897 if (REG_P (offset))
27899 int reg_offset = REGNO (offset);
27900 int reg_base = REGNO (base);
27901 int reg_dest = REGNO (operands[0]);
27903 /* Add the base and offset registers together into the
27904 higher destination register. */
27905 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27906 reg_dest + 1, reg_base, reg_offset);
27908 /* Load the lower destination register from the address in
27909 the higher destination register. */
27910 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27911 reg_dest, reg_dest + 1);
27913 /* Load the higher destination register from its own address
27914 plus 4. */
27915 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27916 reg_dest + 1, reg_dest + 1);
27918 else
27920 /* Compute <address> + 4 for the high order load. */
27921 operands[2] = adjust_address (operands[1], SImode, 4);
27923 /* If the computed address is held in the low order register
27924 then load the high order register first, otherwise always
27925 load the low order register first. */
27926 if (REGNO (operands[0]) == REGNO (base))
27928 output_asm_insn ("ldr\t%H0, %2", operands);
27929 output_asm_insn ("ldr\t%0, %1", operands);
27931 else
27933 output_asm_insn ("ldr\t%0, %1", operands);
27934 output_asm_insn ("ldr\t%H0, %2", operands);
27937 break;
27939 case LABEL_REF:
27940 /* With no registers to worry about we can just load the value
27941 directly. */
27942 operands[2] = adjust_address (operands[1], SImode, 4);
27944 output_asm_insn ("ldr\t%H0, %2", operands);
27945 output_asm_insn ("ldr\t%0, %1", operands);
27946 break;
27948 default:
27949 gcc_unreachable ();
27952 return "";
27955 const char *
27956 thumb_output_move_mem_multiple (int n, rtx *operands)
27958 rtx tmp;
27960 switch (n)
27962 case 2:
27963 if (REGNO (operands[4]) > REGNO (operands[5]))
27965 tmp = operands[4];
27966 operands[4] = operands[5];
27967 operands[5] = tmp;
27969 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27970 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27971 break;
27973 case 3:
27974 if (REGNO (operands[4]) > REGNO (operands[5]))
27976 tmp = operands[4];
27977 operands[4] = operands[5];
27978 operands[5] = tmp;
27980 if (REGNO (operands[5]) > REGNO (operands[6]))
27982 tmp = operands[5];
27983 operands[5] = operands[6];
27984 operands[6] = tmp;
27986 if (REGNO (operands[4]) > REGNO (operands[5]))
27988 tmp = operands[4];
27989 operands[4] = operands[5];
27990 operands[5] = tmp;
27993 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27994 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27995 break;
27997 default:
27998 gcc_unreachable ();
28001 return "";
28004 /* Output a call-via instruction for thumb state. */
28005 const char *
28006 thumb_call_via_reg (rtx reg)
28008 int regno = REGNO (reg);
28009 rtx *labelp;
28011 gcc_assert (regno < LR_REGNUM);
28013 /* If we are in the normal text section we can use a single instance
28014 per compilation unit. If we are doing function sections, then we need
28015 an entry per section, since we can't rely on reachability. */
28016 if (in_section == text_section)
28018 thumb_call_reg_needed = 1;
28020 if (thumb_call_via_label[regno] == NULL)
28021 thumb_call_via_label[regno] = gen_label_rtx ();
28022 labelp = thumb_call_via_label + regno;
28024 else
28026 if (cfun->machine->call_via[regno] == NULL)
28027 cfun->machine->call_via[regno] = gen_label_rtx ();
28028 labelp = cfun->machine->call_via + regno;
28031 output_asm_insn ("bl\t%a0", labelp);
28032 return "";
28035 /* Routines for generating rtl. */
28036 void
28037 thumb_expand_movmemqi (rtx *operands)
28039 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28040 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28041 HOST_WIDE_INT len = INTVAL (operands[2]);
28042 HOST_WIDE_INT offset = 0;
28044 while (len >= 12)
28046 emit_insn (gen_movmem12b (out, in, out, in));
28047 len -= 12;
28050 if (len >= 8)
28052 emit_insn (gen_movmem8b (out, in, out, in));
28053 len -= 8;
28056 if (len >= 4)
28058 rtx reg = gen_reg_rtx (SImode);
28059 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28060 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28061 len -= 4;
28062 offset += 4;
28065 if (len >= 2)
28067 rtx reg = gen_reg_rtx (HImode);
28068 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28069 plus_constant (Pmode, in,
28070 offset))));
28071 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28072 offset)),
28073 reg));
28074 len -= 2;
28075 offset += 2;
28078 if (len)
28080 rtx reg = gen_reg_rtx (QImode);
28081 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28082 plus_constant (Pmode, in,
28083 offset))));
28084 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28085 offset)),
28086 reg));
28090 void
28091 thumb_reload_out_hi (rtx *operands)
28093 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28096 /* Handle reading a half-word from memory during reload. */
28097 void
28098 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28100 gcc_unreachable ();
28103 /* Return the length of a function name prefix
28104 that starts with the character 'c'. */
28105 static int
28106 arm_get_strip_length (int c)
28108 switch (c)
28110 ARM_NAME_ENCODING_LENGTHS
28111 default: return 0;
28115 /* Return a pointer to a function's name with any
28116 and all prefix encodings stripped from it. */
28117 const char *
28118 arm_strip_name_encoding (const char *name)
28120 int skip;
28122 while ((skip = arm_get_strip_length (* name)))
28123 name += skip;
28125 return name;
28128 /* If there is a '*' anywhere in the name's prefix, then
28129 emit the stripped name verbatim, otherwise prepend an
28130 underscore if leading underscores are being used. */
28131 void
28132 arm_asm_output_labelref (FILE *stream, const char *name)
28134 int skip;
28135 int verbatim = 0;
28137 while ((skip = arm_get_strip_length (* name)))
28139 verbatim |= (*name == '*');
28140 name += skip;
28143 if (verbatim)
28144 fputs (name, stream);
28145 else
28146 asm_fprintf (stream, "%U%s", name);
28149 /* This function is used to emit an EABI tag and its associated value.
28150 We emit the numerical value of the tag in case the assembler does not
28151 support textual tags. (Eg gas prior to 2.20). If requested we include
28152 the tag name in a comment so that anyone reading the assembler output
28153 will know which tag is being set.
28155 This function is not static because arm-c.c needs it too. */
28157 void
28158 arm_emit_eabi_attribute (const char *name, int num, int val)
28160 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28161 if (flag_verbose_asm || flag_debug_asm)
28162 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28163 asm_fprintf (asm_out_file, "\n");
28166 static void
28167 arm_file_start (void)
28169 int val;
28171 if (TARGET_UNIFIED_ASM)
28172 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28174 if (TARGET_BPABI)
28176 const char *fpu_name;
28177 if (arm_selected_arch)
28179 /* armv7ve doesn't support any extensions. */
28180 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28182 /* Keep backward compatability for assemblers
28183 which don't support armv7ve. */
28184 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28185 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28186 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28187 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28188 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28190 else
28192 const char* pos = strchr (arm_selected_arch->name, '+');
28193 if (pos)
28195 char buf[15];
28196 gcc_assert (strlen (arm_selected_arch->name)
28197 <= sizeof (buf) / sizeof (*pos));
28198 strncpy (buf, arm_selected_arch->name,
28199 (pos - arm_selected_arch->name) * sizeof (*pos));
28200 buf[pos - arm_selected_arch->name] = '\0';
28201 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28202 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28204 else
28205 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28208 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28209 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28210 else
28212 const char* truncated_name
28213 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28214 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28217 if (TARGET_SOFT_FLOAT)
28219 fpu_name = "softvfp";
28221 else
28223 fpu_name = arm_fpu_desc->name;
28224 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28226 if (TARGET_HARD_FLOAT)
28227 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28228 if (TARGET_HARD_FLOAT_ABI)
28229 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28232 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28234 /* Some of these attributes only apply when the corresponding features
28235 are used. However we don't have any easy way of figuring this out.
28236 Conservatively record the setting that would have been used. */
28238 if (flag_rounding_math)
28239 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28241 if (!flag_unsafe_math_optimizations)
28243 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28244 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28246 if (flag_signaling_nans)
28247 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28249 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28250 flag_finite_math_only ? 1 : 3);
28252 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28253 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28254 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28255 flag_short_enums ? 1 : 2);
28257 /* Tag_ABI_optimization_goals. */
28258 if (optimize_size)
28259 val = 4;
28260 else if (optimize >= 2)
28261 val = 2;
28262 else if (optimize)
28263 val = 1;
28264 else
28265 val = 6;
28266 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28268 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28269 unaligned_access);
28271 if (arm_fp16_format)
28272 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28273 (int) arm_fp16_format);
28275 if (arm_lang_output_object_attributes_hook)
28276 arm_lang_output_object_attributes_hook();
28279 default_file_start ();
28282 static void
28283 arm_file_end (void)
28285 int regno;
28287 if (NEED_INDICATE_EXEC_STACK)
28288 /* Add .note.GNU-stack. */
28289 file_end_indicate_exec_stack ();
28291 if (! thumb_call_reg_needed)
28292 return;
28294 switch_to_section (text_section);
28295 asm_fprintf (asm_out_file, "\t.code 16\n");
28296 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28298 for (regno = 0; regno < LR_REGNUM; regno++)
28300 rtx label = thumb_call_via_label[regno];
28302 if (label != 0)
28304 targetm.asm_out.internal_label (asm_out_file, "L",
28305 CODE_LABEL_NUMBER (label));
28306 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28311 #ifndef ARM_PE
28312 /* Symbols in the text segment can be accessed without indirecting via the
28313 constant pool; it may take an extra binary operation, but this is still
28314 faster than indirecting via memory. Don't do this when not optimizing,
28315 since we won't be calculating al of the offsets necessary to do this
28316 simplification. */
28318 static void
28319 arm_encode_section_info (tree decl, rtx rtl, int first)
28321 if (optimize > 0 && TREE_CONSTANT (decl))
28322 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28324 default_encode_section_info (decl, rtl, first);
28326 #endif /* !ARM_PE */
28328 static void
28329 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28331 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28332 && !strcmp (prefix, "L"))
28334 arm_ccfsm_state = 0;
28335 arm_target_insn = NULL;
28337 default_internal_label (stream, prefix, labelno);
28340 /* Output code to add DELTA to the first argument, and then jump
28341 to FUNCTION. Used for C++ multiple inheritance. */
28342 static void
28343 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28344 HOST_WIDE_INT delta,
28345 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28346 tree function)
28348 static int thunk_label = 0;
28349 char label[256];
28350 char labelpc[256];
28351 int mi_delta = delta;
28352 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28353 int shift = 0;
28354 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28355 ? 1 : 0);
28356 if (mi_delta < 0)
28357 mi_delta = - mi_delta;
28359 final_start_function (emit_barrier (), file, 1);
28361 if (TARGET_THUMB1)
28363 int labelno = thunk_label++;
28364 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28365 /* Thunks are entered in arm mode when avaiable. */
28366 if (TARGET_THUMB1_ONLY)
28368 /* push r3 so we can use it as a temporary. */
28369 /* TODO: Omit this save if r3 is not used. */
28370 fputs ("\tpush {r3}\n", file);
28371 fputs ("\tldr\tr3, ", file);
28373 else
28375 fputs ("\tldr\tr12, ", file);
28377 assemble_name (file, label);
28378 fputc ('\n', file);
28379 if (flag_pic)
28381 /* If we are generating PIC, the ldr instruction below loads
28382 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28383 the address of the add + 8, so we have:
28385 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28386 = target + 1.
28388 Note that we have "+ 1" because some versions of GNU ld
28389 don't set the low bit of the result for R_ARM_REL32
28390 relocations against thumb function symbols.
28391 On ARMv6M this is +4, not +8. */
28392 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28393 assemble_name (file, labelpc);
28394 fputs (":\n", file);
28395 if (TARGET_THUMB1_ONLY)
28397 /* This is 2 insns after the start of the thunk, so we know it
28398 is 4-byte aligned. */
28399 fputs ("\tadd\tr3, pc, r3\n", file);
28400 fputs ("\tmov r12, r3\n", file);
28402 else
28403 fputs ("\tadd\tr12, pc, r12\n", file);
28405 else if (TARGET_THUMB1_ONLY)
28406 fputs ("\tmov r12, r3\n", file);
28408 if (TARGET_THUMB1_ONLY)
28410 if (mi_delta > 255)
28412 fputs ("\tldr\tr3, ", file);
28413 assemble_name (file, label);
28414 fputs ("+4\n", file);
28415 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28416 mi_op, this_regno, this_regno);
28418 else if (mi_delta != 0)
28420 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28421 mi_op, this_regno, this_regno,
28422 mi_delta);
28425 else
28427 /* TODO: Use movw/movt for large constants when available. */
28428 while (mi_delta != 0)
28430 if ((mi_delta & (3 << shift)) == 0)
28431 shift += 2;
28432 else
28434 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28435 mi_op, this_regno, this_regno,
28436 mi_delta & (0xff << shift));
28437 mi_delta &= ~(0xff << shift);
28438 shift += 8;
28442 if (TARGET_THUMB1)
28444 if (TARGET_THUMB1_ONLY)
28445 fputs ("\tpop\t{r3}\n", file);
28447 fprintf (file, "\tbx\tr12\n");
28448 ASM_OUTPUT_ALIGN (file, 2);
28449 assemble_name (file, label);
28450 fputs (":\n", file);
28451 if (flag_pic)
28453 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28454 rtx tem = XEXP (DECL_RTL (function), 0);
28455 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28456 pipeline offset is four rather than eight. Adjust the offset
28457 accordingly. */
28458 tem = plus_constant (GET_MODE (tem), tem,
28459 TARGET_THUMB1_ONLY ? -3 : -7);
28460 tem = gen_rtx_MINUS (GET_MODE (tem),
28461 tem,
28462 gen_rtx_SYMBOL_REF (Pmode,
28463 ggc_strdup (labelpc)));
28464 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28466 else
28467 /* Output ".word .LTHUNKn". */
28468 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28470 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28471 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28473 else
28475 fputs ("\tb\t", file);
28476 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28477 if (NEED_PLT_RELOC)
28478 fputs ("(PLT)", file);
28479 fputc ('\n', file);
28482 final_end_function ();
28486 arm_emit_vector_const (FILE *file, rtx x)
28488 int i;
28489 const char * pattern;
28491 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28493 switch (GET_MODE (x))
28495 case V2SImode: pattern = "%08x"; break;
28496 case V4HImode: pattern = "%04x"; break;
28497 case V8QImode: pattern = "%02x"; break;
28498 default: gcc_unreachable ();
28501 fprintf (file, "0x");
28502 for (i = CONST_VECTOR_NUNITS (x); i--;)
28504 rtx element;
28506 element = CONST_VECTOR_ELT (x, i);
28507 fprintf (file, pattern, INTVAL (element));
28510 return 1;
28513 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28514 HFmode constant pool entries are actually loaded with ldr. */
28515 void
28516 arm_emit_fp16_const (rtx c)
28518 REAL_VALUE_TYPE r;
28519 long bits;
28521 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28522 bits = real_to_target (NULL, &r, HFmode);
28523 if (WORDS_BIG_ENDIAN)
28524 assemble_zeros (2);
28525 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28526 if (!WORDS_BIG_ENDIAN)
28527 assemble_zeros (2);
28530 const char *
28531 arm_output_load_gr (rtx *operands)
28533 rtx reg;
28534 rtx offset;
28535 rtx wcgr;
28536 rtx sum;
28538 if (!MEM_P (operands [1])
28539 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28540 || !REG_P (reg = XEXP (sum, 0))
28541 || !CONST_INT_P (offset = XEXP (sum, 1))
28542 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28543 return "wldrw%?\t%0, %1";
28545 /* Fix up an out-of-range load of a GR register. */
28546 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28547 wcgr = operands[0];
28548 operands[0] = reg;
28549 output_asm_insn ("ldr%?\t%0, %1", operands);
28551 operands[0] = wcgr;
28552 operands[1] = reg;
28553 output_asm_insn ("tmcr%?\t%0, %1", operands);
28554 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28556 return "";
28559 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28561 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28562 named arg and all anonymous args onto the stack.
28563 XXX I know the prologue shouldn't be pushing registers, but it is faster
28564 that way. */
28566 static void
28567 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28568 enum machine_mode mode,
28569 tree type,
28570 int *pretend_size,
28571 int second_time ATTRIBUTE_UNUSED)
28573 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28574 int nregs;
28576 cfun->machine->uses_anonymous_args = 1;
28577 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28579 nregs = pcum->aapcs_ncrn;
28580 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28581 nregs++;
28583 else
28584 nregs = pcum->nregs;
28586 if (nregs < NUM_ARG_REGS)
28587 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28590 /* We can't rely on the caller doing the proper promotion when
28591 using APCS or ATPCS. */
28593 static bool
28594 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28596 return !TARGET_AAPCS_BASED;
28599 static enum machine_mode
28600 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28601 enum machine_mode mode,
28602 int *punsignedp ATTRIBUTE_UNUSED,
28603 const_tree fntype ATTRIBUTE_UNUSED,
28604 int for_return ATTRIBUTE_UNUSED)
28606 if (GET_MODE_CLASS (mode) == MODE_INT
28607 && GET_MODE_SIZE (mode) < 4)
28608 return SImode;
28610 return mode;
28613 /* AAPCS based ABIs use short enums by default. */
28615 static bool
28616 arm_default_short_enums (void)
28618 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28622 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28624 static bool
28625 arm_align_anon_bitfield (void)
28627 return TARGET_AAPCS_BASED;
28631 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28633 static tree
28634 arm_cxx_guard_type (void)
28636 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28640 /* The EABI says test the least significant bit of a guard variable. */
28642 static bool
28643 arm_cxx_guard_mask_bit (void)
28645 return TARGET_AAPCS_BASED;
28649 /* The EABI specifies that all array cookies are 8 bytes long. */
28651 static tree
28652 arm_get_cookie_size (tree type)
28654 tree size;
28656 if (!TARGET_AAPCS_BASED)
28657 return default_cxx_get_cookie_size (type);
28659 size = build_int_cst (sizetype, 8);
28660 return size;
28664 /* The EABI says that array cookies should also contain the element size. */
28666 static bool
28667 arm_cookie_has_size (void)
28669 return TARGET_AAPCS_BASED;
28673 /* The EABI says constructors and destructors should return a pointer to
28674 the object constructed/destroyed. */
28676 static bool
28677 arm_cxx_cdtor_returns_this (void)
28679 return TARGET_AAPCS_BASED;
28682 /* The EABI says that an inline function may never be the key
28683 method. */
28685 static bool
28686 arm_cxx_key_method_may_be_inline (void)
28688 return !TARGET_AAPCS_BASED;
28691 static void
28692 arm_cxx_determine_class_data_visibility (tree decl)
28694 if (!TARGET_AAPCS_BASED
28695 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28696 return;
28698 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28699 is exported. However, on systems without dynamic vague linkage,
28700 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28701 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28702 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28703 else
28704 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28705 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28708 static bool
28709 arm_cxx_class_data_always_comdat (void)
28711 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28712 vague linkage if the class has no key function. */
28713 return !TARGET_AAPCS_BASED;
28717 /* The EABI says __aeabi_atexit should be used to register static
28718 destructors. */
28720 static bool
28721 arm_cxx_use_aeabi_atexit (void)
28723 return TARGET_AAPCS_BASED;
28727 void
28728 arm_set_return_address (rtx source, rtx scratch)
28730 arm_stack_offsets *offsets;
28731 HOST_WIDE_INT delta;
28732 rtx addr;
28733 unsigned long saved_regs;
28735 offsets = arm_get_frame_offsets ();
28736 saved_regs = offsets->saved_regs_mask;
28738 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28739 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28740 else
28742 if (frame_pointer_needed)
28743 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28744 else
28746 /* LR will be the first saved register. */
28747 delta = offsets->outgoing_args - (offsets->frame + 4);
28750 if (delta >= 4096)
28752 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28753 GEN_INT (delta & ~4095)));
28754 addr = scratch;
28755 delta &= 4095;
28757 else
28758 addr = stack_pointer_rtx;
28760 addr = plus_constant (Pmode, addr, delta);
28762 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28767 void
28768 thumb_set_return_address (rtx source, rtx scratch)
28770 arm_stack_offsets *offsets;
28771 HOST_WIDE_INT delta;
28772 HOST_WIDE_INT limit;
28773 int reg;
28774 rtx addr;
28775 unsigned long mask;
28777 emit_use (source);
28779 offsets = arm_get_frame_offsets ();
28780 mask = offsets->saved_regs_mask;
28781 if (mask & (1 << LR_REGNUM))
28783 limit = 1024;
28784 /* Find the saved regs. */
28785 if (frame_pointer_needed)
28787 delta = offsets->soft_frame - offsets->saved_args;
28788 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28789 if (TARGET_THUMB1)
28790 limit = 128;
28792 else
28794 delta = offsets->outgoing_args - offsets->saved_args;
28795 reg = SP_REGNUM;
28797 /* Allow for the stack frame. */
28798 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28799 delta -= 16;
28800 /* The link register is always the first saved register. */
28801 delta -= 4;
28803 /* Construct the address. */
28804 addr = gen_rtx_REG (SImode, reg);
28805 if (delta > limit)
28807 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28808 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28809 addr = scratch;
28811 else
28812 addr = plus_constant (Pmode, addr, delta);
28814 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28816 else
28817 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28820 /* Implements target hook vector_mode_supported_p. */
28821 bool
28822 arm_vector_mode_supported_p (enum machine_mode mode)
28824 /* Neon also supports V2SImode, etc. listed in the clause below. */
28825 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28826 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28827 return true;
28829 if ((TARGET_NEON || TARGET_IWMMXT)
28830 && ((mode == V2SImode)
28831 || (mode == V4HImode)
28832 || (mode == V8QImode)))
28833 return true;
28835 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28836 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28837 || mode == V2HAmode))
28838 return true;
28840 return false;
28843 /* Implements target hook array_mode_supported_p. */
28845 static bool
28846 arm_array_mode_supported_p (enum machine_mode mode,
28847 unsigned HOST_WIDE_INT nelems)
28849 if (TARGET_NEON
28850 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28851 && (nelems >= 2 && nelems <= 4))
28852 return true;
28854 return false;
28857 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28858 registers when autovectorizing for Neon, at least until multiple vector
28859 widths are supported properly by the middle-end. */
28861 static enum machine_mode
28862 arm_preferred_simd_mode (enum machine_mode mode)
28864 if (TARGET_NEON)
28865 switch (mode)
28867 case SFmode:
28868 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28869 case SImode:
28870 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28871 case HImode:
28872 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28873 case QImode:
28874 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28875 case DImode:
28876 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28877 return V2DImode;
28878 break;
28880 default:;
28883 if (TARGET_REALLY_IWMMXT)
28884 switch (mode)
28886 case SImode:
28887 return V2SImode;
28888 case HImode:
28889 return V4HImode;
28890 case QImode:
28891 return V8QImode;
28893 default:;
28896 return word_mode;
28899 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28901 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28902 using r0-r4 for function arguments, r7 for the stack frame and don't have
28903 enough left over to do doubleword arithmetic. For Thumb-2 all the
28904 potentially problematic instructions accept high registers so this is not
28905 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28906 that require many low registers. */
28907 static bool
28908 arm_class_likely_spilled_p (reg_class_t rclass)
28910 if ((TARGET_THUMB1 && rclass == LO_REGS)
28911 || rclass == CC_REG)
28912 return true;
28914 return false;
28917 /* Implements target hook small_register_classes_for_mode_p. */
28918 bool
28919 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28921 return TARGET_THUMB1;
28924 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28925 ARM insns and therefore guarantee that the shift count is modulo 256.
28926 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28927 guarantee no particular behavior for out-of-range counts. */
28929 static unsigned HOST_WIDE_INT
28930 arm_shift_truncation_mask (enum machine_mode mode)
28932 return mode == SImode ? 255 : 0;
28936 /* Map internal gcc register numbers to DWARF2 register numbers. */
28938 unsigned int
28939 arm_dbx_register_number (unsigned int regno)
28941 if (regno < 16)
28942 return regno;
28944 if (IS_VFP_REGNUM (regno))
28946 /* See comment in arm_dwarf_register_span. */
28947 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28948 return 64 + regno - FIRST_VFP_REGNUM;
28949 else
28950 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28953 if (IS_IWMMXT_GR_REGNUM (regno))
28954 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28956 if (IS_IWMMXT_REGNUM (regno))
28957 return 112 + regno - FIRST_IWMMXT_REGNUM;
28959 gcc_unreachable ();
28962 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28963 GCC models tham as 64 32-bit registers, so we need to describe this to
28964 the DWARF generation code. Other registers can use the default. */
28965 static rtx
28966 arm_dwarf_register_span (rtx rtl)
28968 enum machine_mode mode;
28969 unsigned regno;
28970 rtx parts[16];
28971 int nregs;
28972 int i;
28974 regno = REGNO (rtl);
28975 if (!IS_VFP_REGNUM (regno))
28976 return NULL_RTX;
28978 /* XXX FIXME: The EABI defines two VFP register ranges:
28979 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28980 256-287: D0-D31
28981 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28982 corresponding D register. Until GDB supports this, we shall use the
28983 legacy encodings. We also use these encodings for D0-D15 for
28984 compatibility with older debuggers. */
28985 mode = GET_MODE (rtl);
28986 if (GET_MODE_SIZE (mode) < 8)
28987 return NULL_RTX;
28989 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28991 nregs = GET_MODE_SIZE (mode) / 4;
28992 for (i = 0; i < nregs; i += 2)
28993 if (TARGET_BIG_END)
28995 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28996 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28998 else
29000 parts[i] = gen_rtx_REG (SImode, regno + i);
29001 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29004 else
29006 nregs = GET_MODE_SIZE (mode) / 8;
29007 for (i = 0; i < nregs; i++)
29008 parts[i] = gen_rtx_REG (DImode, regno + i);
29011 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29014 #if ARM_UNWIND_INFO
29015 /* Emit unwind directives for a store-multiple instruction or stack pointer
29016 push during alignment.
29017 These should only ever be generated by the function prologue code, so
29018 expect them to have a particular form.
29019 The store-multiple instruction sometimes pushes pc as the last register,
29020 although it should not be tracked into unwind information, or for -Os
29021 sometimes pushes some dummy registers before first register that needs
29022 to be tracked in unwind information; such dummy registers are there just
29023 to avoid separate stack adjustment, and will not be restored in the
29024 epilogue. */
29026 static void
29027 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29029 int i;
29030 HOST_WIDE_INT offset;
29031 HOST_WIDE_INT nregs;
29032 int reg_size;
29033 unsigned reg;
29034 unsigned lastreg;
29035 unsigned padfirst = 0, padlast = 0;
29036 rtx e;
29038 e = XVECEXP (p, 0, 0);
29039 gcc_assert (GET_CODE (e) == SET);
29041 /* First insn will adjust the stack pointer. */
29042 gcc_assert (GET_CODE (e) == SET
29043 && REG_P (SET_DEST (e))
29044 && REGNO (SET_DEST (e)) == SP_REGNUM
29045 && GET_CODE (SET_SRC (e)) == PLUS);
29047 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29048 nregs = XVECLEN (p, 0) - 1;
29049 gcc_assert (nregs);
29051 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29052 if (reg < 16)
29054 /* For -Os dummy registers can be pushed at the beginning to
29055 avoid separate stack pointer adjustment. */
29056 e = XVECEXP (p, 0, 1);
29057 e = XEXP (SET_DEST (e), 0);
29058 if (GET_CODE (e) == PLUS)
29059 padfirst = INTVAL (XEXP (e, 1));
29060 gcc_assert (padfirst == 0 || optimize_size);
29061 /* The function prologue may also push pc, but not annotate it as it is
29062 never restored. We turn this into a stack pointer adjustment. */
29063 e = XVECEXP (p, 0, nregs);
29064 e = XEXP (SET_DEST (e), 0);
29065 if (GET_CODE (e) == PLUS)
29066 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29067 else
29068 padlast = offset - 4;
29069 gcc_assert (padlast == 0 || padlast == 4);
29070 if (padlast == 4)
29071 fprintf (asm_out_file, "\t.pad #4\n");
29072 reg_size = 4;
29073 fprintf (asm_out_file, "\t.save {");
29075 else if (IS_VFP_REGNUM (reg))
29077 reg_size = 8;
29078 fprintf (asm_out_file, "\t.vsave {");
29080 else
29081 /* Unknown register type. */
29082 gcc_unreachable ();
29084 /* If the stack increment doesn't match the size of the saved registers,
29085 something has gone horribly wrong. */
29086 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29088 offset = padfirst;
29089 lastreg = 0;
29090 /* The remaining insns will describe the stores. */
29091 for (i = 1; i <= nregs; i++)
29093 /* Expect (set (mem <addr>) (reg)).
29094 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29095 e = XVECEXP (p, 0, i);
29096 gcc_assert (GET_CODE (e) == SET
29097 && MEM_P (SET_DEST (e))
29098 && REG_P (SET_SRC (e)));
29100 reg = REGNO (SET_SRC (e));
29101 gcc_assert (reg >= lastreg);
29103 if (i != 1)
29104 fprintf (asm_out_file, ", ");
29105 /* We can't use %r for vfp because we need to use the
29106 double precision register names. */
29107 if (IS_VFP_REGNUM (reg))
29108 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29109 else
29110 asm_fprintf (asm_out_file, "%r", reg);
29112 #ifdef ENABLE_CHECKING
29113 /* Check that the addresses are consecutive. */
29114 e = XEXP (SET_DEST (e), 0);
29115 if (GET_CODE (e) == PLUS)
29116 gcc_assert (REG_P (XEXP (e, 0))
29117 && REGNO (XEXP (e, 0)) == SP_REGNUM
29118 && CONST_INT_P (XEXP (e, 1))
29119 && offset == INTVAL (XEXP (e, 1)));
29120 else
29121 gcc_assert (i == 1
29122 && REG_P (e)
29123 && REGNO (e) == SP_REGNUM);
29124 offset += reg_size;
29125 #endif
29127 fprintf (asm_out_file, "}\n");
29128 if (padfirst)
29129 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29132 /* Emit unwind directives for a SET. */
29134 static void
29135 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29137 rtx e0;
29138 rtx e1;
29139 unsigned reg;
29141 e0 = XEXP (p, 0);
29142 e1 = XEXP (p, 1);
29143 switch (GET_CODE (e0))
29145 case MEM:
29146 /* Pushing a single register. */
29147 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29148 || !REG_P (XEXP (XEXP (e0, 0), 0))
29149 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29150 abort ();
29152 asm_fprintf (asm_out_file, "\t.save ");
29153 if (IS_VFP_REGNUM (REGNO (e1)))
29154 asm_fprintf(asm_out_file, "{d%d}\n",
29155 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29156 else
29157 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29158 break;
29160 case REG:
29161 if (REGNO (e0) == SP_REGNUM)
29163 /* A stack increment. */
29164 if (GET_CODE (e1) != PLUS
29165 || !REG_P (XEXP (e1, 0))
29166 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29167 || !CONST_INT_P (XEXP (e1, 1)))
29168 abort ();
29170 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29171 -INTVAL (XEXP (e1, 1)));
29173 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29175 HOST_WIDE_INT offset;
29177 if (GET_CODE (e1) == PLUS)
29179 if (!REG_P (XEXP (e1, 0))
29180 || !CONST_INT_P (XEXP (e1, 1)))
29181 abort ();
29182 reg = REGNO (XEXP (e1, 0));
29183 offset = INTVAL (XEXP (e1, 1));
29184 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29185 HARD_FRAME_POINTER_REGNUM, reg,
29186 offset);
29188 else if (REG_P (e1))
29190 reg = REGNO (e1);
29191 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29192 HARD_FRAME_POINTER_REGNUM, reg);
29194 else
29195 abort ();
29197 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29199 /* Move from sp to reg. */
29200 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29202 else if (GET_CODE (e1) == PLUS
29203 && REG_P (XEXP (e1, 0))
29204 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29205 && CONST_INT_P (XEXP (e1, 1)))
29207 /* Set reg to offset from sp. */
29208 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29209 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29211 else
29212 abort ();
29213 break;
29215 default:
29216 abort ();
29221 /* Emit unwind directives for the given insn. */
29223 static void
29224 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29226 rtx note, pat;
29227 bool handled_one = false;
29229 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29230 return;
29232 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29233 && (TREE_NOTHROW (current_function_decl)
29234 || crtl->all_throwers_are_sibcalls))
29235 return;
29237 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29238 return;
29240 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29242 switch (REG_NOTE_KIND (note))
29244 case REG_FRAME_RELATED_EXPR:
29245 pat = XEXP (note, 0);
29246 goto found;
29248 case REG_CFA_REGISTER:
29249 pat = XEXP (note, 0);
29250 if (pat == NULL)
29252 pat = PATTERN (insn);
29253 if (GET_CODE (pat) == PARALLEL)
29254 pat = XVECEXP (pat, 0, 0);
29257 /* Only emitted for IS_STACKALIGN re-alignment. */
29259 rtx dest, src;
29260 unsigned reg;
29262 src = SET_SRC (pat);
29263 dest = SET_DEST (pat);
29265 gcc_assert (src == stack_pointer_rtx);
29266 reg = REGNO (dest);
29267 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29268 reg + 0x90, reg);
29270 handled_one = true;
29271 break;
29273 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29274 to get correct dwarf information for shrink-wrap. We should not
29275 emit unwind information for it because these are used either for
29276 pretend arguments or notes to adjust sp and restore registers from
29277 stack. */
29278 case REG_CFA_DEF_CFA:
29279 case REG_CFA_ADJUST_CFA:
29280 case REG_CFA_RESTORE:
29281 return;
29283 case REG_CFA_EXPRESSION:
29284 case REG_CFA_OFFSET:
29285 /* ??? Only handling here what we actually emit. */
29286 gcc_unreachable ();
29288 default:
29289 break;
29292 if (handled_one)
29293 return;
29294 pat = PATTERN (insn);
29295 found:
29297 switch (GET_CODE (pat))
29299 case SET:
29300 arm_unwind_emit_set (asm_out_file, pat);
29301 break;
29303 case SEQUENCE:
29304 /* Store multiple. */
29305 arm_unwind_emit_sequence (asm_out_file, pat);
29306 break;
29308 default:
29309 abort();
29314 /* Output a reference from a function exception table to the type_info
29315 object X. The EABI specifies that the symbol should be relocated by
29316 an R_ARM_TARGET2 relocation. */
29318 static bool
29319 arm_output_ttype (rtx x)
29321 fputs ("\t.word\t", asm_out_file);
29322 output_addr_const (asm_out_file, x);
29323 /* Use special relocations for symbol references. */
29324 if (!CONST_INT_P (x))
29325 fputs ("(TARGET2)", asm_out_file);
29326 fputc ('\n', asm_out_file);
29328 return TRUE;
29331 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29333 static void
29334 arm_asm_emit_except_personality (rtx personality)
29336 fputs ("\t.personality\t", asm_out_file);
29337 output_addr_const (asm_out_file, personality);
29338 fputc ('\n', asm_out_file);
29341 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29343 static void
29344 arm_asm_init_sections (void)
29346 exception_section = get_unnamed_section (0, output_section_asm_op,
29347 "\t.handlerdata");
29349 #endif /* ARM_UNWIND_INFO */
29351 /* Output unwind directives for the start/end of a function. */
29353 void
29354 arm_output_fn_unwind (FILE * f, bool prologue)
29356 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29357 return;
29359 if (prologue)
29360 fputs ("\t.fnstart\n", f);
29361 else
29363 /* If this function will never be unwound, then mark it as such.
29364 The came condition is used in arm_unwind_emit to suppress
29365 the frame annotations. */
29366 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29367 && (TREE_NOTHROW (current_function_decl)
29368 || crtl->all_throwers_are_sibcalls))
29369 fputs("\t.cantunwind\n", f);
29371 fputs ("\t.fnend\n", f);
29375 static bool
29376 arm_emit_tls_decoration (FILE *fp, rtx x)
29378 enum tls_reloc reloc;
29379 rtx val;
29381 val = XVECEXP (x, 0, 0);
29382 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29384 output_addr_const (fp, val);
29386 switch (reloc)
29388 case TLS_GD32:
29389 fputs ("(tlsgd)", fp);
29390 break;
29391 case TLS_LDM32:
29392 fputs ("(tlsldm)", fp);
29393 break;
29394 case TLS_LDO32:
29395 fputs ("(tlsldo)", fp);
29396 break;
29397 case TLS_IE32:
29398 fputs ("(gottpoff)", fp);
29399 break;
29400 case TLS_LE32:
29401 fputs ("(tpoff)", fp);
29402 break;
29403 case TLS_DESCSEQ:
29404 fputs ("(tlsdesc)", fp);
29405 break;
29406 default:
29407 gcc_unreachable ();
29410 switch (reloc)
29412 case TLS_GD32:
29413 case TLS_LDM32:
29414 case TLS_IE32:
29415 case TLS_DESCSEQ:
29416 fputs (" + (. - ", fp);
29417 output_addr_const (fp, XVECEXP (x, 0, 2));
29418 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29419 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29420 output_addr_const (fp, XVECEXP (x, 0, 3));
29421 fputc (')', fp);
29422 break;
29423 default:
29424 break;
29427 return TRUE;
29430 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29432 static void
29433 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29435 gcc_assert (size == 4);
29436 fputs ("\t.word\t", file);
29437 output_addr_const (file, x);
29438 fputs ("(tlsldo)", file);
29441 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29443 static bool
29444 arm_output_addr_const_extra (FILE *fp, rtx x)
29446 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29447 return arm_emit_tls_decoration (fp, x);
29448 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29450 char label[256];
29451 int labelno = INTVAL (XVECEXP (x, 0, 0));
29453 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29454 assemble_name_raw (fp, label);
29456 return TRUE;
29458 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29460 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29461 if (GOT_PCREL)
29462 fputs ("+.", fp);
29463 fputs ("-(", fp);
29464 output_addr_const (fp, XVECEXP (x, 0, 0));
29465 fputc (')', fp);
29466 return TRUE;
29468 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29470 output_addr_const (fp, XVECEXP (x, 0, 0));
29471 if (GOT_PCREL)
29472 fputs ("+.", fp);
29473 fputs ("-(", fp);
29474 output_addr_const (fp, XVECEXP (x, 0, 1));
29475 fputc (')', fp);
29476 return TRUE;
29478 else if (GET_CODE (x) == CONST_VECTOR)
29479 return arm_emit_vector_const (fp, x);
29481 return FALSE;
29484 /* Output assembly for a shift instruction.
29485 SET_FLAGS determines how the instruction modifies the condition codes.
29486 0 - Do not set condition codes.
29487 1 - Set condition codes.
29488 2 - Use smallest instruction. */
29489 const char *
29490 arm_output_shift(rtx * operands, int set_flags)
29492 char pattern[100];
29493 static const char flag_chars[3] = {'?', '.', '!'};
29494 const char *shift;
29495 HOST_WIDE_INT val;
29496 char c;
29498 c = flag_chars[set_flags];
29499 if (TARGET_UNIFIED_ASM)
29501 shift = shift_op(operands[3], &val);
29502 if (shift)
29504 if (val != -1)
29505 operands[2] = GEN_INT(val);
29506 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29508 else
29509 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29511 else
29512 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29513 output_asm_insn (pattern, operands);
29514 return "";
29517 /* Output assembly for a WMMX immediate shift instruction. */
29518 const char *
29519 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29521 int shift = INTVAL (operands[2]);
29522 char templ[50];
29523 enum machine_mode opmode = GET_MODE (operands[0]);
29525 gcc_assert (shift >= 0);
29527 /* If the shift value in the register versions is > 63 (for D qualifier),
29528 31 (for W qualifier) or 15 (for H qualifier). */
29529 if (((opmode == V4HImode) && (shift > 15))
29530 || ((opmode == V2SImode) && (shift > 31))
29531 || ((opmode == DImode) && (shift > 63)))
29533 if (wror_or_wsra)
29535 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29536 output_asm_insn (templ, operands);
29537 if (opmode == DImode)
29539 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29540 output_asm_insn (templ, operands);
29543 else
29545 /* The destination register will contain all zeros. */
29546 sprintf (templ, "wzero\t%%0");
29547 output_asm_insn (templ, operands);
29549 return "";
29552 if ((opmode == DImode) && (shift > 32))
29554 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29555 output_asm_insn (templ, operands);
29556 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29557 output_asm_insn (templ, operands);
29559 else
29561 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29562 output_asm_insn (templ, operands);
29564 return "";
29567 /* Output assembly for a WMMX tinsr instruction. */
29568 const char *
29569 arm_output_iwmmxt_tinsr (rtx *operands)
29571 int mask = INTVAL (operands[3]);
29572 int i;
29573 char templ[50];
29574 int units = mode_nunits[GET_MODE (operands[0])];
29575 gcc_assert ((mask & (mask - 1)) == 0);
29576 for (i = 0; i < units; ++i)
29578 if ((mask & 0x01) == 1)
29580 break;
29582 mask >>= 1;
29584 gcc_assert (i < units);
29586 switch (GET_MODE (operands[0]))
29588 case V8QImode:
29589 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29590 break;
29591 case V4HImode:
29592 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29593 break;
29594 case V2SImode:
29595 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29596 break;
29597 default:
29598 gcc_unreachable ();
29599 break;
29601 output_asm_insn (templ, operands);
29603 return "";
29606 /* Output a Thumb-1 casesi dispatch sequence. */
29607 const char *
29608 thumb1_output_casesi (rtx *operands)
29610 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29612 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29614 switch (GET_MODE(diff_vec))
29616 case QImode:
29617 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29618 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29619 case HImode:
29620 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29621 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29622 case SImode:
29623 return "bl\t%___gnu_thumb1_case_si";
29624 default:
29625 gcc_unreachable ();
29629 /* Output a Thumb-2 casesi instruction. */
29630 const char *
29631 thumb2_output_casesi (rtx *operands)
29633 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29635 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29637 output_asm_insn ("cmp\t%0, %1", operands);
29638 output_asm_insn ("bhi\t%l3", operands);
29639 switch (GET_MODE(diff_vec))
29641 case QImode:
29642 return "tbb\t[%|pc, %0]";
29643 case HImode:
29644 return "tbh\t[%|pc, %0, lsl #1]";
29645 case SImode:
29646 if (flag_pic)
29648 output_asm_insn ("adr\t%4, %l2", operands);
29649 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29650 output_asm_insn ("add\t%4, %4, %5", operands);
29651 return "bx\t%4";
29653 else
29655 output_asm_insn ("adr\t%4, %l2", operands);
29656 return "ldr\t%|pc, [%4, %0, lsl #2]";
29658 default:
29659 gcc_unreachable ();
29663 /* Most ARM cores are single issue, but some newer ones can dual issue.
29664 The scheduler descriptions rely on this being correct. */
29665 static int
29666 arm_issue_rate (void)
29668 switch (arm_tune)
29670 case cortexa15:
29671 case cortexa57:
29672 return 3;
29674 case cortexr4:
29675 case cortexr4f:
29676 case cortexr5:
29677 case genericv7a:
29678 case cortexa5:
29679 case cortexa7:
29680 case cortexa8:
29681 case cortexa9:
29682 case cortexa12:
29683 case cortexa53:
29684 case fa726te:
29685 case marvell_pj4:
29686 return 2;
29688 default:
29689 return 1;
29693 /* A table and a function to perform ARM-specific name mangling for
29694 NEON vector types in order to conform to the AAPCS (see "Procedure
29695 Call Standard for the ARM Architecture", Appendix A). To qualify
29696 for emission with the mangled names defined in that document, a
29697 vector type must not only be of the correct mode but also be
29698 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29699 typedef struct
29701 enum machine_mode mode;
29702 const char *element_type_name;
29703 const char *aapcs_name;
29704 } arm_mangle_map_entry;
29706 static arm_mangle_map_entry arm_mangle_map[] = {
29707 /* 64-bit containerized types. */
29708 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29709 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29710 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29711 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29712 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29713 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29714 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29715 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29716 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29717 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29719 /* 128-bit containerized types. */
29720 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29721 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29722 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29723 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29724 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29725 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29726 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29727 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29728 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29729 { VOIDmode, NULL, NULL }
29732 const char *
29733 arm_mangle_type (const_tree type)
29735 arm_mangle_map_entry *pos = arm_mangle_map;
29737 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29738 has to be managled as if it is in the "std" namespace. */
29739 if (TARGET_AAPCS_BASED
29740 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29741 return "St9__va_list";
29743 /* Half-precision float. */
29744 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29745 return "Dh";
29747 if (TREE_CODE (type) != VECTOR_TYPE)
29748 return NULL;
29750 /* Check the mode of the vector type, and the name of the vector
29751 element type, against the table. */
29752 while (pos->mode != VOIDmode)
29754 tree elt_type = TREE_TYPE (type);
29756 if (pos->mode == TYPE_MODE (type)
29757 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29758 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29759 pos->element_type_name))
29760 return pos->aapcs_name;
29762 pos++;
29765 /* Use the default mangling for unrecognized (possibly user-defined)
29766 vector types. */
29767 return NULL;
29770 /* Order of allocation of core registers for Thumb: this allocation is
29771 written over the corresponding initial entries of the array
29772 initialized with REG_ALLOC_ORDER. We allocate all low registers
29773 first. Saving and restoring a low register is usually cheaper than
29774 using a call-clobbered high register. */
29776 static const int thumb_core_reg_alloc_order[] =
29778 3, 2, 1, 0, 4, 5, 6, 7,
29779 14, 12, 8, 9, 10, 11
29782 /* Adjust register allocation order when compiling for Thumb. */
29784 void
29785 arm_order_regs_for_local_alloc (void)
29787 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29788 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29789 if (TARGET_THUMB)
29790 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29791 sizeof (thumb_core_reg_alloc_order));
29794 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29796 bool
29797 arm_frame_pointer_required (void)
29799 return (cfun->has_nonlocal_label
29800 || SUBTARGET_FRAME_POINTER_REQUIRED
29801 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29804 /* Only thumb1 can't support conditional execution, so return true if
29805 the target is not thumb1. */
29806 static bool
29807 arm_have_conditional_execution (void)
29809 return !TARGET_THUMB1;
29812 tree
29813 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29815 enum machine_mode in_mode, out_mode;
29816 int in_n, out_n;
29818 if (TREE_CODE (type_out) != VECTOR_TYPE
29819 || TREE_CODE (type_in) != VECTOR_TYPE)
29820 return NULL_TREE;
29822 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29823 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29824 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29825 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29827 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29828 decl of the vectorized builtin for the appropriate vector mode.
29829 NULL_TREE is returned if no such builtin is available. */
29830 #undef ARM_CHECK_BUILTIN_MODE
29831 #define ARM_CHECK_BUILTIN_MODE(C) \
29832 (TARGET_NEON && TARGET_FPU_ARMV8 \
29833 && flag_unsafe_math_optimizations \
29834 && ARM_CHECK_BUILTIN_MODE_1 (C))
29836 #undef ARM_CHECK_BUILTIN_MODE_1
29837 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29838 (out_mode == SFmode && out_n == C \
29839 && in_mode == SFmode && in_n == C)
29841 #undef ARM_FIND_VRINT_VARIANT
29842 #define ARM_FIND_VRINT_VARIANT(N) \
29843 (ARM_CHECK_BUILTIN_MODE (2) \
29844 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29845 : (ARM_CHECK_BUILTIN_MODE (4) \
29846 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29847 : NULL_TREE))
29849 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29851 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29852 switch (fn)
29854 case BUILT_IN_FLOORF:
29855 return ARM_FIND_VRINT_VARIANT (vrintm);
29856 case BUILT_IN_CEILF:
29857 return ARM_FIND_VRINT_VARIANT (vrintp);
29858 case BUILT_IN_TRUNCF:
29859 return ARM_FIND_VRINT_VARIANT (vrintz);
29860 case BUILT_IN_ROUNDF:
29861 return ARM_FIND_VRINT_VARIANT (vrinta);
29862 #undef ARM_CHECK_BUILTIN_MODE
29863 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29864 (out_mode == N##Imode && out_n == C \
29865 && in_mode == N##Imode && in_n == C)
29866 case BUILT_IN_BSWAP16:
29867 if (ARM_CHECK_BUILTIN_MODE (4, H))
29868 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
29869 else if (ARM_CHECK_BUILTIN_MODE (8, H))
29870 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
29871 else
29872 return NULL_TREE;
29873 case BUILT_IN_BSWAP32:
29874 if (ARM_CHECK_BUILTIN_MODE (2, S))
29875 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
29876 else if (ARM_CHECK_BUILTIN_MODE (4, S))
29877 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
29878 else
29879 return NULL_TREE;
29880 case BUILT_IN_BSWAP64:
29881 if (ARM_CHECK_BUILTIN_MODE (2, D))
29882 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
29883 else
29884 return NULL_TREE;
29886 default:
29887 return NULL_TREE;
29890 return NULL_TREE;
29892 #undef ARM_CHECK_BUILTIN_MODE
29893 #undef ARM_FIND_VRINT_VARIANT
29895 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29896 static HOST_WIDE_INT
29897 arm_vector_alignment (const_tree type)
29899 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29901 if (TARGET_AAPCS_BASED)
29902 align = MIN (align, 64);
29904 return align;
29907 static unsigned int
29908 arm_autovectorize_vector_sizes (void)
29910 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29913 static bool
29914 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29916 /* Vectors which aren't in packed structures will not be less aligned than
29917 the natural alignment of their element type, so this is safe. */
29918 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29919 return !is_packed;
29921 return default_builtin_vector_alignment_reachable (type, is_packed);
29924 static bool
29925 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29926 const_tree type, int misalignment,
29927 bool is_packed)
29929 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29931 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29933 if (is_packed)
29934 return align == 1;
29936 /* If the misalignment is unknown, we should be able to handle the access
29937 so long as it is not to a member of a packed data structure. */
29938 if (misalignment == -1)
29939 return true;
29941 /* Return true if the misalignment is a multiple of the natural alignment
29942 of the vector's element type. This is probably always going to be
29943 true in practice, since we've already established that this isn't a
29944 packed access. */
29945 return ((misalignment % align) == 0);
29948 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29949 is_packed);
29952 static void
29953 arm_conditional_register_usage (void)
29955 int regno;
29957 if (TARGET_THUMB1 && optimize_size)
29959 /* When optimizing for size on Thumb-1, it's better not
29960 to use the HI regs, because of the overhead of
29961 stacking them. */
29962 for (regno = FIRST_HI_REGNUM;
29963 regno <= LAST_HI_REGNUM; ++regno)
29964 fixed_regs[regno] = call_used_regs[regno] = 1;
29967 /* The link register can be clobbered by any branch insn,
29968 but we have no way to track that at present, so mark
29969 it as unavailable. */
29970 if (TARGET_THUMB1)
29971 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29973 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29975 /* VFPv3 registers are disabled when earlier VFP
29976 versions are selected due to the definition of
29977 LAST_VFP_REGNUM. */
29978 for (regno = FIRST_VFP_REGNUM;
29979 regno <= LAST_VFP_REGNUM; ++ regno)
29981 fixed_regs[regno] = 0;
29982 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29983 || regno >= FIRST_VFP_REGNUM + 32;
29987 if (TARGET_REALLY_IWMMXT)
29989 regno = FIRST_IWMMXT_GR_REGNUM;
29990 /* The 2002/10/09 revision of the XScale ABI has wCG0
29991 and wCG1 as call-preserved registers. The 2002/11/21
29992 revision changed this so that all wCG registers are
29993 scratch registers. */
29994 for (regno = FIRST_IWMMXT_GR_REGNUM;
29995 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29996 fixed_regs[regno] = 0;
29997 /* The XScale ABI has wR0 - wR9 as scratch registers,
29998 the rest as call-preserved registers. */
29999 for (regno = FIRST_IWMMXT_REGNUM;
30000 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30002 fixed_regs[regno] = 0;
30003 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30007 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30009 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30010 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30012 else if (TARGET_APCS_STACK)
30014 fixed_regs[10] = 1;
30015 call_used_regs[10] = 1;
30017 /* -mcaller-super-interworking reserves r11 for calls to
30018 _interwork_r11_call_via_rN(). Making the register global
30019 is an easy way of ensuring that it remains valid for all
30020 calls. */
30021 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30022 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30024 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30025 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30026 if (TARGET_CALLER_INTERWORKING)
30027 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30029 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30032 static reg_class_t
30033 arm_preferred_rename_class (reg_class_t rclass)
30035 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30036 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30037 and code size can be reduced. */
30038 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30039 return LO_REGS;
30040 else
30041 return NO_REGS;
30044 /* Compute the atrribute "length" of insn "*push_multi".
30045 So this function MUST be kept in sync with that insn pattern. */
30047 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30049 int i, regno, hi_reg;
30050 int num_saves = XVECLEN (parallel_op, 0);
30052 /* ARM mode. */
30053 if (TARGET_ARM)
30054 return 4;
30055 /* Thumb1 mode. */
30056 if (TARGET_THUMB1)
30057 return 2;
30059 /* Thumb2 mode. */
30060 regno = REGNO (first_op);
30061 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30062 for (i = 1; i < num_saves && !hi_reg; i++)
30064 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30065 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30068 if (!hi_reg)
30069 return 2;
30070 return 4;
30073 /* Compute the number of instructions emitted by output_move_double. */
30075 arm_count_output_move_double_insns (rtx *operands)
30077 int count;
30078 rtx ops[2];
30079 /* output_move_double may modify the operands array, so call it
30080 here on a copy of the array. */
30081 ops[0] = operands[0];
30082 ops[1] = operands[1];
30083 output_move_double (ops, false, &count);
30084 return count;
30088 vfp3_const_double_for_fract_bits (rtx operand)
30090 REAL_VALUE_TYPE r0;
30092 if (!CONST_DOUBLE_P (operand))
30093 return 0;
30095 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30096 if (exact_real_inverse (DFmode, &r0))
30098 if (exact_real_truncate (DFmode, &r0))
30100 HOST_WIDE_INT value = real_to_integer (&r0);
30101 value = value & 0xffffffff;
30102 if ((value != 0) && ( (value & (value - 1)) == 0))
30103 return int_log2 (value);
30106 return 0;
30110 vfp3_const_double_for_bits (rtx operand)
30112 REAL_VALUE_TYPE r0;
30114 if (!CONST_DOUBLE_P (operand))
30115 return 0;
30117 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30118 if (exact_real_truncate (DFmode, &r0))
30120 HOST_WIDE_INT value = real_to_integer (&r0);
30121 value = value & 0xffffffff;
30122 if ((value != 0) && ( (value & (value - 1)) == 0))
30123 return int_log2 (value);
30126 return 0;
30129 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30131 static void
30132 arm_pre_atomic_barrier (enum memmodel model)
30134 if (need_atomic_barrier_p (model, true))
30135 emit_insn (gen_memory_barrier ());
30138 static void
30139 arm_post_atomic_barrier (enum memmodel model)
30141 if (need_atomic_barrier_p (model, false))
30142 emit_insn (gen_memory_barrier ());
30145 /* Emit the load-exclusive and store-exclusive instructions.
30146 Use acquire and release versions if necessary. */
30148 static void
30149 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30151 rtx (*gen) (rtx, rtx);
30153 if (acq)
30155 switch (mode)
30157 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30158 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30159 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30160 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30161 default:
30162 gcc_unreachable ();
30165 else
30167 switch (mode)
30169 case QImode: gen = gen_arm_load_exclusiveqi; break;
30170 case HImode: gen = gen_arm_load_exclusivehi; break;
30171 case SImode: gen = gen_arm_load_exclusivesi; break;
30172 case DImode: gen = gen_arm_load_exclusivedi; break;
30173 default:
30174 gcc_unreachable ();
30178 emit_insn (gen (rval, mem));
30181 static void
30182 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30183 rtx mem, bool rel)
30185 rtx (*gen) (rtx, rtx, rtx);
30187 if (rel)
30189 switch (mode)
30191 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30192 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30193 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30194 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30195 default:
30196 gcc_unreachable ();
30199 else
30201 switch (mode)
30203 case QImode: gen = gen_arm_store_exclusiveqi; break;
30204 case HImode: gen = gen_arm_store_exclusivehi; break;
30205 case SImode: gen = gen_arm_store_exclusivesi; break;
30206 case DImode: gen = gen_arm_store_exclusivedi; break;
30207 default:
30208 gcc_unreachable ();
30212 emit_insn (gen (bval, rval, mem));
30215 /* Mark the previous jump instruction as unlikely. */
30217 static void
30218 emit_unlikely_jump (rtx insn)
30220 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30222 insn = emit_jump_insn (insn);
30223 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30226 /* Expand a compare and swap pattern. */
30228 void
30229 arm_expand_compare_and_swap (rtx operands[])
30231 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30232 enum machine_mode mode;
30233 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30235 bval = operands[0];
30236 rval = operands[1];
30237 mem = operands[2];
30238 oldval = operands[3];
30239 newval = operands[4];
30240 is_weak = operands[5];
30241 mod_s = operands[6];
30242 mod_f = operands[7];
30243 mode = GET_MODE (mem);
30245 /* Normally the succ memory model must be stronger than fail, but in the
30246 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30247 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30249 if (TARGET_HAVE_LDACQ
30250 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30251 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30252 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30254 switch (mode)
30256 case QImode:
30257 case HImode:
30258 /* For narrow modes, we're going to perform the comparison in SImode,
30259 so do the zero-extension now. */
30260 rval = gen_reg_rtx (SImode);
30261 oldval = convert_modes (SImode, mode, oldval, true);
30262 /* FALLTHRU */
30264 case SImode:
30265 /* Force the value into a register if needed. We waited until after
30266 the zero-extension above to do this properly. */
30267 if (!arm_add_operand (oldval, SImode))
30268 oldval = force_reg (SImode, oldval);
30269 break;
30271 case DImode:
30272 if (!cmpdi_operand (oldval, mode))
30273 oldval = force_reg (mode, oldval);
30274 break;
30276 default:
30277 gcc_unreachable ();
30280 switch (mode)
30282 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30283 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30284 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30285 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30286 default:
30287 gcc_unreachable ();
30290 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30292 if (mode == QImode || mode == HImode)
30293 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30295 /* In all cases, we arrange for success to be signaled by Z set.
30296 This arrangement allows for the boolean result to be used directly
30297 in a subsequent branch, post optimization. */
30298 x = gen_rtx_REG (CCmode, CC_REGNUM);
30299 x = gen_rtx_EQ (SImode, x, const0_rtx);
30300 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30303 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30304 another memory store between the load-exclusive and store-exclusive can
30305 reset the monitor from Exclusive to Open state. This means we must wait
30306 until after reload to split the pattern, lest we get a register spill in
30307 the middle of the atomic sequence. */
30309 void
30310 arm_split_compare_and_swap (rtx operands[])
30312 rtx rval, mem, oldval, newval, scratch;
30313 enum machine_mode mode;
30314 enum memmodel mod_s, mod_f;
30315 bool is_weak;
30316 rtx label1, label2, x, cond;
30318 rval = operands[0];
30319 mem = operands[1];
30320 oldval = operands[2];
30321 newval = operands[3];
30322 is_weak = (operands[4] != const0_rtx);
30323 mod_s = (enum memmodel) INTVAL (operands[5]);
30324 mod_f = (enum memmodel) INTVAL (operands[6]);
30325 scratch = operands[7];
30326 mode = GET_MODE (mem);
30328 bool use_acquire = TARGET_HAVE_LDACQ
30329 && !(mod_s == MEMMODEL_RELAXED
30330 || mod_s == MEMMODEL_CONSUME
30331 || mod_s == MEMMODEL_RELEASE);
30333 bool use_release = TARGET_HAVE_LDACQ
30334 && !(mod_s == MEMMODEL_RELAXED
30335 || mod_s == MEMMODEL_CONSUME
30336 || mod_s == MEMMODEL_ACQUIRE);
30338 /* Checks whether a barrier is needed and emits one accordingly. */
30339 if (!(use_acquire || use_release))
30340 arm_pre_atomic_barrier (mod_s);
30342 label1 = NULL_RTX;
30343 if (!is_weak)
30345 label1 = gen_label_rtx ();
30346 emit_label (label1);
30348 label2 = gen_label_rtx ();
30350 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30352 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30353 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30354 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30355 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30356 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30358 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30360 /* Weak or strong, we want EQ to be true for success, so that we
30361 match the flags that we got from the compare above. */
30362 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30363 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30364 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30366 if (!is_weak)
30368 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30369 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30370 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30371 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30374 if (mod_f != MEMMODEL_RELAXED)
30375 emit_label (label2);
30377 /* Checks whether a barrier is needed and emits one accordingly. */
30378 if (!(use_acquire || use_release))
30379 arm_post_atomic_barrier (mod_s);
30381 if (mod_f == MEMMODEL_RELAXED)
30382 emit_label (label2);
30385 void
30386 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30387 rtx value, rtx model_rtx, rtx cond)
30389 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30390 enum machine_mode mode = GET_MODE (mem);
30391 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30392 rtx label, x;
30394 bool use_acquire = TARGET_HAVE_LDACQ
30395 && !(model == MEMMODEL_RELAXED
30396 || model == MEMMODEL_CONSUME
30397 || model == MEMMODEL_RELEASE);
30399 bool use_release = TARGET_HAVE_LDACQ
30400 && !(model == MEMMODEL_RELAXED
30401 || model == MEMMODEL_CONSUME
30402 || model == MEMMODEL_ACQUIRE);
30404 /* Checks whether a barrier is needed and emits one accordingly. */
30405 if (!(use_acquire || use_release))
30406 arm_pre_atomic_barrier (model);
30408 label = gen_label_rtx ();
30409 emit_label (label);
30411 if (new_out)
30412 new_out = gen_lowpart (wmode, new_out);
30413 if (old_out)
30414 old_out = gen_lowpart (wmode, old_out);
30415 else
30416 old_out = new_out;
30417 value = simplify_gen_subreg (wmode, value, mode, 0);
30419 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30421 switch (code)
30423 case SET:
30424 new_out = value;
30425 break;
30427 case NOT:
30428 x = gen_rtx_AND (wmode, old_out, value);
30429 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30430 x = gen_rtx_NOT (wmode, new_out);
30431 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30432 break;
30434 case MINUS:
30435 if (CONST_INT_P (value))
30437 value = GEN_INT (-INTVAL (value));
30438 code = PLUS;
30440 /* FALLTHRU */
30442 case PLUS:
30443 if (mode == DImode)
30445 /* DImode plus/minus need to clobber flags. */
30446 /* The adddi3 and subdi3 patterns are incorrectly written so that
30447 they require matching operands, even when we could easily support
30448 three operands. Thankfully, this can be fixed up post-splitting,
30449 as the individual add+adc patterns do accept three operands and
30450 post-reload cprop can make these moves go away. */
30451 emit_move_insn (new_out, old_out);
30452 if (code == PLUS)
30453 x = gen_adddi3 (new_out, new_out, value);
30454 else
30455 x = gen_subdi3 (new_out, new_out, value);
30456 emit_insn (x);
30457 break;
30459 /* FALLTHRU */
30461 default:
30462 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30463 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30464 break;
30467 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30468 use_release);
30470 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30471 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30473 /* Checks whether a barrier is needed and emits one accordingly. */
30474 if (!(use_acquire || use_release))
30475 arm_post_atomic_barrier (model);
30478 #define MAX_VECT_LEN 16
30480 struct expand_vec_perm_d
30482 rtx target, op0, op1;
30483 unsigned char perm[MAX_VECT_LEN];
30484 enum machine_mode vmode;
30485 unsigned char nelt;
30486 bool one_vector_p;
30487 bool testing_p;
30490 /* Generate a variable permutation. */
30492 static void
30493 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30495 enum machine_mode vmode = GET_MODE (target);
30496 bool one_vector_p = rtx_equal_p (op0, op1);
30498 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30499 gcc_checking_assert (GET_MODE (op0) == vmode);
30500 gcc_checking_assert (GET_MODE (op1) == vmode);
30501 gcc_checking_assert (GET_MODE (sel) == vmode);
30502 gcc_checking_assert (TARGET_NEON);
30504 if (one_vector_p)
30506 if (vmode == V8QImode)
30507 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30508 else
30509 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30511 else
30513 rtx pair;
30515 if (vmode == V8QImode)
30517 pair = gen_reg_rtx (V16QImode);
30518 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30519 pair = gen_lowpart (TImode, pair);
30520 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30522 else
30524 pair = gen_reg_rtx (OImode);
30525 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30526 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30531 void
30532 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30534 enum machine_mode vmode = GET_MODE (target);
30535 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30536 bool one_vector_p = rtx_equal_p (op0, op1);
30537 rtx rmask[MAX_VECT_LEN], mask;
30539 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30540 numbering of elements for big-endian, we must reverse the order. */
30541 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30543 /* The VTBL instruction does not use a modulo index, so we must take care
30544 of that ourselves. */
30545 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30546 for (i = 0; i < nelt; ++i)
30547 rmask[i] = mask;
30548 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30549 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30551 arm_expand_vec_perm_1 (target, op0, op1, sel);
30554 /* Generate or test for an insn that supports a constant permutation. */
30556 /* Recognize patterns for the VUZP insns. */
30558 static bool
30559 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30561 unsigned int i, odd, mask, nelt = d->nelt;
30562 rtx out0, out1, in0, in1, x;
30563 rtx (*gen)(rtx, rtx, rtx, rtx);
30565 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30566 return false;
30568 /* Note that these are little-endian tests. Adjust for big-endian later. */
30569 if (d->perm[0] == 0)
30570 odd = 0;
30571 else if (d->perm[0] == 1)
30572 odd = 1;
30573 else
30574 return false;
30575 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30577 for (i = 0; i < nelt; i++)
30579 unsigned elt = (i * 2 + odd) & mask;
30580 if (d->perm[i] != elt)
30581 return false;
30584 /* Success! */
30585 if (d->testing_p)
30586 return true;
30588 switch (d->vmode)
30590 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30591 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30592 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30593 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30594 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30595 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30596 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30597 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30598 default:
30599 gcc_unreachable ();
30602 in0 = d->op0;
30603 in1 = d->op1;
30604 if (BYTES_BIG_ENDIAN)
30606 x = in0, in0 = in1, in1 = x;
30607 odd = !odd;
30610 out0 = d->target;
30611 out1 = gen_reg_rtx (d->vmode);
30612 if (odd)
30613 x = out0, out0 = out1, out1 = x;
30615 emit_insn (gen (out0, in0, in1, out1));
30616 return true;
30619 /* Recognize patterns for the VZIP insns. */
30621 static bool
30622 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30624 unsigned int i, high, mask, nelt = d->nelt;
30625 rtx out0, out1, in0, in1, x;
30626 rtx (*gen)(rtx, rtx, rtx, rtx);
30628 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30629 return false;
30631 /* Note that these are little-endian tests. Adjust for big-endian later. */
30632 high = nelt / 2;
30633 if (d->perm[0] == high)
30635 else if (d->perm[0] == 0)
30636 high = 0;
30637 else
30638 return false;
30639 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30641 for (i = 0; i < nelt / 2; i++)
30643 unsigned elt = (i + high) & mask;
30644 if (d->perm[i * 2] != elt)
30645 return false;
30646 elt = (elt + nelt) & mask;
30647 if (d->perm[i * 2 + 1] != elt)
30648 return false;
30651 /* Success! */
30652 if (d->testing_p)
30653 return true;
30655 switch (d->vmode)
30657 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30658 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30659 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30660 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30661 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30662 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30663 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30664 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30665 default:
30666 gcc_unreachable ();
30669 in0 = d->op0;
30670 in1 = d->op1;
30671 if (BYTES_BIG_ENDIAN)
30673 x = in0, in0 = in1, in1 = x;
30674 high = !high;
30677 out0 = d->target;
30678 out1 = gen_reg_rtx (d->vmode);
30679 if (high)
30680 x = out0, out0 = out1, out1 = x;
30682 emit_insn (gen (out0, in0, in1, out1));
30683 return true;
30686 /* Recognize patterns for the VREV insns. */
30688 static bool
30689 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30691 unsigned int i, j, diff, nelt = d->nelt;
30692 rtx (*gen)(rtx, rtx, rtx);
30694 if (!d->one_vector_p)
30695 return false;
30697 diff = d->perm[0];
30698 switch (diff)
30700 case 7:
30701 switch (d->vmode)
30703 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30704 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30705 default:
30706 return false;
30708 break;
30709 case 3:
30710 switch (d->vmode)
30712 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30713 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30714 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30715 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30716 default:
30717 return false;
30719 break;
30720 case 1:
30721 switch (d->vmode)
30723 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30724 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30725 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30726 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30727 case V4SImode: gen = gen_neon_vrev64v4si; break;
30728 case V2SImode: gen = gen_neon_vrev64v2si; break;
30729 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30730 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30731 default:
30732 return false;
30734 break;
30735 default:
30736 return false;
30739 for (i = 0; i < nelt ; i += diff + 1)
30740 for (j = 0; j <= diff; j += 1)
30742 /* This is guaranteed to be true as the value of diff
30743 is 7, 3, 1 and we should have enough elements in the
30744 queue to generate this. Getting a vector mask with a
30745 value of diff other than these values implies that
30746 something is wrong by the time we get here. */
30747 gcc_assert (i + j < nelt);
30748 if (d->perm[i + j] != i + diff - j)
30749 return false;
30752 /* Success! */
30753 if (d->testing_p)
30754 return true;
30756 /* ??? The third operand is an artifact of the builtin infrastructure
30757 and is ignored by the actual instruction. */
30758 emit_insn (gen (d->target, d->op0, const0_rtx));
30759 return true;
30762 /* Recognize patterns for the VTRN insns. */
30764 static bool
30765 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30767 unsigned int i, odd, mask, nelt = d->nelt;
30768 rtx out0, out1, in0, in1, x;
30769 rtx (*gen)(rtx, rtx, rtx, rtx);
30771 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30772 return false;
30774 /* Note that these are little-endian tests. Adjust for big-endian later. */
30775 if (d->perm[0] == 0)
30776 odd = 0;
30777 else if (d->perm[0] == 1)
30778 odd = 1;
30779 else
30780 return false;
30781 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30783 for (i = 0; i < nelt; i += 2)
30785 if (d->perm[i] != i + odd)
30786 return false;
30787 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30788 return false;
30791 /* Success! */
30792 if (d->testing_p)
30793 return true;
30795 switch (d->vmode)
30797 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30798 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30799 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30800 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30801 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30802 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30803 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30804 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30805 default:
30806 gcc_unreachable ();
30809 in0 = d->op0;
30810 in1 = d->op1;
30811 if (BYTES_BIG_ENDIAN)
30813 x = in0, in0 = in1, in1 = x;
30814 odd = !odd;
30817 out0 = d->target;
30818 out1 = gen_reg_rtx (d->vmode);
30819 if (odd)
30820 x = out0, out0 = out1, out1 = x;
30822 emit_insn (gen (out0, in0, in1, out1));
30823 return true;
30826 /* Recognize patterns for the VEXT insns. */
30828 static bool
30829 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30831 unsigned int i, nelt = d->nelt;
30832 rtx (*gen) (rtx, rtx, rtx, rtx);
30833 rtx offset;
30835 unsigned int location;
30837 unsigned int next = d->perm[0] + 1;
30839 /* TODO: Handle GCC's numbering of elements for big-endian. */
30840 if (BYTES_BIG_ENDIAN)
30841 return false;
30843 /* Check if the extracted indexes are increasing by one. */
30844 for (i = 1; i < nelt; next++, i++)
30846 /* If we hit the most significant element of the 2nd vector in
30847 the previous iteration, no need to test further. */
30848 if (next == 2 * nelt)
30849 return false;
30851 /* If we are operating on only one vector: it could be a
30852 rotation. If there are only two elements of size < 64, let
30853 arm_evpc_neon_vrev catch it. */
30854 if (d->one_vector_p && (next == nelt))
30856 if ((nelt == 2) && (d->vmode != V2DImode))
30857 return false;
30858 else
30859 next = 0;
30862 if (d->perm[i] != next)
30863 return false;
30866 location = d->perm[0];
30868 switch (d->vmode)
30870 case V16QImode: gen = gen_neon_vextv16qi; break;
30871 case V8QImode: gen = gen_neon_vextv8qi; break;
30872 case V4HImode: gen = gen_neon_vextv4hi; break;
30873 case V8HImode: gen = gen_neon_vextv8hi; break;
30874 case V2SImode: gen = gen_neon_vextv2si; break;
30875 case V4SImode: gen = gen_neon_vextv4si; break;
30876 case V2SFmode: gen = gen_neon_vextv2sf; break;
30877 case V4SFmode: gen = gen_neon_vextv4sf; break;
30878 case V2DImode: gen = gen_neon_vextv2di; break;
30879 default:
30880 return false;
30883 /* Success! */
30884 if (d->testing_p)
30885 return true;
30887 offset = GEN_INT (location);
30888 emit_insn (gen (d->target, d->op0, d->op1, offset));
30889 return true;
30892 /* The NEON VTBL instruction is a fully variable permuation that's even
30893 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30894 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30895 can do slightly better by expanding this as a constant where we don't
30896 have to apply a mask. */
30898 static bool
30899 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30901 rtx rperm[MAX_VECT_LEN], sel;
30902 enum machine_mode vmode = d->vmode;
30903 unsigned int i, nelt = d->nelt;
30905 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30906 numbering of elements for big-endian, we must reverse the order. */
30907 if (BYTES_BIG_ENDIAN)
30908 return false;
30910 if (d->testing_p)
30911 return true;
30913 /* Generic code will try constant permutation twice. Once with the
30914 original mode and again with the elements lowered to QImode.
30915 So wait and don't do the selector expansion ourselves. */
30916 if (vmode != V8QImode && vmode != V16QImode)
30917 return false;
30919 for (i = 0; i < nelt; ++i)
30920 rperm[i] = GEN_INT (d->perm[i]);
30921 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30922 sel = force_reg (vmode, sel);
30924 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30925 return true;
30928 static bool
30929 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30931 /* Check if the input mask matches vext before reordering the
30932 operands. */
30933 if (TARGET_NEON)
30934 if (arm_evpc_neon_vext (d))
30935 return true;
30937 /* The pattern matching functions above are written to look for a small
30938 number to begin the sequence (0, 1, N/2). If we begin with an index
30939 from the second operand, we can swap the operands. */
30940 if (d->perm[0] >= d->nelt)
30942 unsigned i, nelt = d->nelt;
30943 rtx x;
30945 for (i = 0; i < nelt; ++i)
30946 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30948 x = d->op0;
30949 d->op0 = d->op1;
30950 d->op1 = x;
30953 if (TARGET_NEON)
30955 if (arm_evpc_neon_vuzp (d))
30956 return true;
30957 if (arm_evpc_neon_vzip (d))
30958 return true;
30959 if (arm_evpc_neon_vrev (d))
30960 return true;
30961 if (arm_evpc_neon_vtrn (d))
30962 return true;
30963 return arm_evpc_neon_vtbl (d);
30965 return false;
30968 /* Expand a vec_perm_const pattern. */
30970 bool
30971 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30973 struct expand_vec_perm_d d;
30974 int i, nelt, which;
30976 d.target = target;
30977 d.op0 = op0;
30978 d.op1 = op1;
30980 d.vmode = GET_MODE (target);
30981 gcc_assert (VECTOR_MODE_P (d.vmode));
30982 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30983 d.testing_p = false;
30985 for (i = which = 0; i < nelt; ++i)
30987 rtx e = XVECEXP (sel, 0, i);
30988 int ei = INTVAL (e) & (2 * nelt - 1);
30989 which |= (ei < nelt ? 1 : 2);
30990 d.perm[i] = ei;
30993 switch (which)
30995 default:
30996 gcc_unreachable();
30998 case 3:
30999 d.one_vector_p = false;
31000 if (!rtx_equal_p (op0, op1))
31001 break;
31003 /* The elements of PERM do not suggest that only the first operand
31004 is used, but both operands are identical. Allow easier matching
31005 of the permutation by folding the permutation into the single
31006 input vector. */
31007 /* FALLTHRU */
31008 case 2:
31009 for (i = 0; i < nelt; ++i)
31010 d.perm[i] &= nelt - 1;
31011 d.op0 = op1;
31012 d.one_vector_p = true;
31013 break;
31015 case 1:
31016 d.op1 = op0;
31017 d.one_vector_p = true;
31018 break;
31021 return arm_expand_vec_perm_const_1 (&d);
31024 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31026 static bool
31027 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31028 const unsigned char *sel)
31030 struct expand_vec_perm_d d;
31031 unsigned int i, nelt, which;
31032 bool ret;
31034 d.vmode = vmode;
31035 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31036 d.testing_p = true;
31037 memcpy (d.perm, sel, nelt);
31039 /* Categorize the set of elements in the selector. */
31040 for (i = which = 0; i < nelt; ++i)
31042 unsigned char e = d.perm[i];
31043 gcc_assert (e < 2 * nelt);
31044 which |= (e < nelt ? 1 : 2);
31047 /* For all elements from second vector, fold the elements to first. */
31048 if (which == 2)
31049 for (i = 0; i < nelt; ++i)
31050 d.perm[i] -= nelt;
31052 /* Check whether the mask can be applied to the vector type. */
31053 d.one_vector_p = (which != 3);
31055 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31056 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31057 if (!d.one_vector_p)
31058 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31060 start_sequence ();
31061 ret = arm_expand_vec_perm_const_1 (&d);
31062 end_sequence ();
31064 return ret;
31067 bool
31068 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31070 /* If we are soft float and we do not have ldrd
31071 then all auto increment forms are ok. */
31072 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31073 return true;
31075 switch (code)
31077 /* Post increment and Pre Decrement are supported for all
31078 instruction forms except for vector forms. */
31079 case ARM_POST_INC:
31080 case ARM_PRE_DEC:
31081 if (VECTOR_MODE_P (mode))
31083 if (code != ARM_PRE_DEC)
31084 return true;
31085 else
31086 return false;
31089 return true;
31091 case ARM_POST_DEC:
31092 case ARM_PRE_INC:
31093 /* Without LDRD and mode size greater than
31094 word size, there is no point in auto-incrementing
31095 because ldm and stm will not have these forms. */
31096 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31097 return false;
31099 /* Vector and floating point modes do not support
31100 these auto increment forms. */
31101 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31102 return false;
31104 return true;
31106 default:
31107 return false;
31111 return false;
31114 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31115 on ARM, since we know that shifts by negative amounts are no-ops.
31116 Additionally, the default expansion code is not available or suitable
31117 for post-reload insn splits (this can occur when the register allocator
31118 chooses not to do a shift in NEON).
31120 This function is used in both initial expand and post-reload splits, and
31121 handles all kinds of 64-bit shifts.
31123 Input requirements:
31124 - It is safe for the input and output to be the same register, but
31125 early-clobber rules apply for the shift amount and scratch registers.
31126 - Shift by register requires both scratch registers. In all other cases
31127 the scratch registers may be NULL.
31128 - Ashiftrt by a register also clobbers the CC register. */
31129 void
31130 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31131 rtx amount, rtx scratch1, rtx scratch2)
31133 rtx out_high = gen_highpart (SImode, out);
31134 rtx out_low = gen_lowpart (SImode, out);
31135 rtx in_high = gen_highpart (SImode, in);
31136 rtx in_low = gen_lowpart (SImode, in);
31138 /* Terminology:
31139 in = the register pair containing the input value.
31140 out = the destination register pair.
31141 up = the high- or low-part of each pair.
31142 down = the opposite part to "up".
31143 In a shift, we can consider bits to shift from "up"-stream to
31144 "down"-stream, so in a left-shift "up" is the low-part and "down"
31145 is the high-part of each register pair. */
31147 rtx out_up = code == ASHIFT ? out_low : out_high;
31148 rtx out_down = code == ASHIFT ? out_high : out_low;
31149 rtx in_up = code == ASHIFT ? in_low : in_high;
31150 rtx in_down = code == ASHIFT ? in_high : in_low;
31152 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31153 gcc_assert (out
31154 && (REG_P (out) || GET_CODE (out) == SUBREG)
31155 && GET_MODE (out) == DImode);
31156 gcc_assert (in
31157 && (REG_P (in) || GET_CODE (in) == SUBREG)
31158 && GET_MODE (in) == DImode);
31159 gcc_assert (amount
31160 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31161 && GET_MODE (amount) == SImode)
31162 || CONST_INT_P (amount)));
31163 gcc_assert (scratch1 == NULL
31164 || (GET_CODE (scratch1) == SCRATCH)
31165 || (GET_MODE (scratch1) == SImode
31166 && REG_P (scratch1)));
31167 gcc_assert (scratch2 == NULL
31168 || (GET_CODE (scratch2) == SCRATCH)
31169 || (GET_MODE (scratch2) == SImode
31170 && REG_P (scratch2)));
31171 gcc_assert (!REG_P (out) || !REG_P (amount)
31172 || !HARD_REGISTER_P (out)
31173 || (REGNO (out) != REGNO (amount)
31174 && REGNO (out) + 1 != REGNO (amount)));
31176 /* Macros to make following code more readable. */
31177 #define SUB_32(DEST,SRC) \
31178 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31179 #define RSB_32(DEST,SRC) \
31180 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31181 #define SUB_S_32(DEST,SRC) \
31182 gen_addsi3_compare0 ((DEST), (SRC), \
31183 GEN_INT (-32))
31184 #define SET(DEST,SRC) \
31185 gen_rtx_SET (SImode, (DEST), (SRC))
31186 #define SHIFT(CODE,SRC,AMOUNT) \
31187 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31188 #define LSHIFT(CODE,SRC,AMOUNT) \
31189 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31190 SImode, (SRC), (AMOUNT))
31191 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31192 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31193 SImode, (SRC), (AMOUNT))
31194 #define ORR(A,B) \
31195 gen_rtx_IOR (SImode, (A), (B))
31196 #define BRANCH(COND,LABEL) \
31197 gen_arm_cond_branch ((LABEL), \
31198 gen_rtx_ ## COND (CCmode, cc_reg, \
31199 const0_rtx), \
31200 cc_reg)
31202 /* Shifts by register and shifts by constant are handled separately. */
31203 if (CONST_INT_P (amount))
31205 /* We have a shift-by-constant. */
31207 /* First, handle out-of-range shift amounts.
31208 In both cases we try to match the result an ARM instruction in a
31209 shift-by-register would give. This helps reduce execution
31210 differences between optimization levels, but it won't stop other
31211 parts of the compiler doing different things. This is "undefined
31212 behaviour, in any case. */
31213 if (INTVAL (amount) <= 0)
31214 emit_insn (gen_movdi (out, in));
31215 else if (INTVAL (amount) >= 64)
31217 if (code == ASHIFTRT)
31219 rtx const31_rtx = GEN_INT (31);
31220 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31221 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31223 else
31224 emit_insn (gen_movdi (out, const0_rtx));
31227 /* Now handle valid shifts. */
31228 else if (INTVAL (amount) < 32)
31230 /* Shifts by a constant less than 32. */
31231 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31233 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31234 emit_insn (SET (out_down,
31235 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31236 out_down)));
31237 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31239 else
31241 /* Shifts by a constant greater than 31. */
31242 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31244 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31245 if (code == ASHIFTRT)
31246 emit_insn (gen_ashrsi3 (out_up, in_up,
31247 GEN_INT (31)));
31248 else
31249 emit_insn (SET (out_up, const0_rtx));
31252 else
31254 /* We have a shift-by-register. */
31255 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31257 /* This alternative requires the scratch registers. */
31258 gcc_assert (scratch1 && REG_P (scratch1));
31259 gcc_assert (scratch2 && REG_P (scratch2));
31261 /* We will need the values "amount-32" and "32-amount" later.
31262 Swapping them around now allows the later code to be more general. */
31263 switch (code)
31265 case ASHIFT:
31266 emit_insn (SUB_32 (scratch1, amount));
31267 emit_insn (RSB_32 (scratch2, amount));
31268 break;
31269 case ASHIFTRT:
31270 emit_insn (RSB_32 (scratch1, amount));
31271 /* Also set CC = amount > 32. */
31272 emit_insn (SUB_S_32 (scratch2, amount));
31273 break;
31274 case LSHIFTRT:
31275 emit_insn (RSB_32 (scratch1, amount));
31276 emit_insn (SUB_32 (scratch2, amount));
31277 break;
31278 default:
31279 gcc_unreachable ();
31282 /* Emit code like this:
31284 arithmetic-left:
31285 out_down = in_down << amount;
31286 out_down = (in_up << (amount - 32)) | out_down;
31287 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31288 out_up = in_up << amount;
31290 arithmetic-right:
31291 out_down = in_down >> amount;
31292 out_down = (in_up << (32 - amount)) | out_down;
31293 if (amount < 32)
31294 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31295 out_up = in_up << amount;
31297 logical-right:
31298 out_down = in_down >> amount;
31299 out_down = (in_up << (32 - amount)) | out_down;
31300 if (amount < 32)
31301 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31302 out_up = in_up << amount;
31304 The ARM and Thumb2 variants are the same but implemented slightly
31305 differently. If this were only called during expand we could just
31306 use the Thumb2 case and let combine do the right thing, but this
31307 can also be called from post-reload splitters. */
31309 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31311 if (!TARGET_THUMB2)
31313 /* Emit code for ARM mode. */
31314 emit_insn (SET (out_down,
31315 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31316 if (code == ASHIFTRT)
31318 rtx done_label = gen_label_rtx ();
31319 emit_jump_insn (BRANCH (LT, done_label));
31320 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31321 out_down)));
31322 emit_label (done_label);
31324 else
31325 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31326 out_down)));
31328 else
31330 /* Emit code for Thumb2 mode.
31331 Thumb2 can't do shift and or in one insn. */
31332 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31333 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31335 if (code == ASHIFTRT)
31337 rtx done_label = gen_label_rtx ();
31338 emit_jump_insn (BRANCH (LT, done_label));
31339 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31340 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31341 emit_label (done_label);
31343 else
31345 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31346 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31350 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31353 #undef SUB_32
31354 #undef RSB_32
31355 #undef SUB_S_32
31356 #undef SET
31357 #undef SHIFT
31358 #undef LSHIFT
31359 #undef REV_LSHIFT
31360 #undef ORR
31361 #undef BRANCH
31365 /* Returns true if a valid comparison operation and makes
31366 the operands in a form that is valid. */
31367 bool
31368 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31370 enum rtx_code code = GET_CODE (*comparison);
31371 int code_int;
31372 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31373 ? GET_MODE (*op2) : GET_MODE (*op1);
31375 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31377 if (code == UNEQ || code == LTGT)
31378 return false;
31380 code_int = (int)code;
31381 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31382 PUT_CODE (*comparison, (enum rtx_code)code_int);
31384 switch (mode)
31386 case SImode:
31387 if (!arm_add_operand (*op1, mode))
31388 *op1 = force_reg (mode, *op1);
31389 if (!arm_add_operand (*op2, mode))
31390 *op2 = force_reg (mode, *op2);
31391 return true;
31393 case DImode:
31394 if (!cmpdi_operand (*op1, mode))
31395 *op1 = force_reg (mode, *op1);
31396 if (!cmpdi_operand (*op2, mode))
31397 *op2 = force_reg (mode, *op2);
31398 return true;
31400 case SFmode:
31401 case DFmode:
31402 if (!arm_float_compare_operand (*op1, mode))
31403 *op1 = force_reg (mode, *op1);
31404 if (!arm_float_compare_operand (*op2, mode))
31405 *op2 = force_reg (mode, *op2);
31406 return true;
31407 default:
31408 break;
31411 return false;
31415 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31417 static unsigned HOST_WIDE_INT
31418 arm_asan_shadow_offset (void)
31420 return (unsigned HOST_WIDE_INT) 1 << 29;
31424 /* This is a temporary fix for PR60655. Ideally we need
31425 to handle most of these cases in the generic part but
31426 currently we reject minus (..) (sym_ref). We try to
31427 ameliorate the case with minus (sym_ref1) (sym_ref2)
31428 where they are in the same section. */
31430 static bool
31431 arm_const_not_ok_for_debug_p (rtx p)
31433 tree decl_op0 = NULL;
31434 tree decl_op1 = NULL;
31436 if (GET_CODE (p) == MINUS)
31438 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31440 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31441 if (decl_op1
31442 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31443 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31445 if ((TREE_CODE (decl_op1) == VAR_DECL
31446 || TREE_CODE (decl_op1) == CONST_DECL)
31447 && (TREE_CODE (decl_op0) == VAR_DECL
31448 || TREE_CODE (decl_op0) == CONST_DECL))
31449 return (get_variable_section (decl_op1, false)
31450 != get_variable_section (decl_op0, false));
31452 if (TREE_CODE (decl_op1) == LABEL_DECL
31453 && TREE_CODE (decl_op0) == LABEL_DECL)
31454 return (DECL_CONTEXT (decl_op1)
31455 != DECL_CONTEXT (decl_op0));
31458 return true;
31462 return false;
31465 static void
31466 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
31468 const unsigned ARM_FE_INVALID = 1;
31469 const unsigned ARM_FE_DIVBYZERO = 2;
31470 const unsigned ARM_FE_OVERFLOW = 4;
31471 const unsigned ARM_FE_UNDERFLOW = 8;
31472 const unsigned ARM_FE_INEXACT = 16;
31473 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
31474 | ARM_FE_DIVBYZERO
31475 | ARM_FE_OVERFLOW
31476 | ARM_FE_UNDERFLOW
31477 | ARM_FE_INEXACT);
31478 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
31479 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
31480 tree new_fenv_var, reload_fenv, restore_fnenv;
31481 tree update_call, atomic_feraiseexcept, hold_fnclex;
31483 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
31484 return default_atomic_assign_expand_fenv (hold, clear, update);
31486 /* Generate the equivalent of :
31487 unsigned int fenv_var;
31488 fenv_var = __builtin_arm_get_fpscr ();
31490 unsigned int masked_fenv;
31491 masked_fenv = fenv_var & mask;
31493 __builtin_arm_set_fpscr (masked_fenv); */
31495 fenv_var = create_tmp_var (unsigned_type_node, NULL);
31496 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
31497 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
31498 mask = build_int_cst (unsigned_type_node,
31499 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
31500 | ARM_FE_ALL_EXCEPT));
31501 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
31502 fenv_var, build_call_expr (get_fpscr, 0));
31503 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
31504 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
31505 *hold = build2 (COMPOUND_EXPR, void_type_node,
31506 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
31507 hold_fnclex);
31509 /* Store the value of masked_fenv to clear the exceptions:
31510 __builtin_arm_set_fpscr (masked_fenv); */
31512 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
31514 /* Generate the equivalent of :
31515 unsigned int new_fenv_var;
31516 new_fenv_var = __builtin_arm_get_fpscr ();
31518 __builtin_arm_set_fpscr (fenv_var);
31520 __atomic_feraiseexcept (new_fenv_var); */
31522 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
31523 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
31524 build_call_expr (get_fpscr, 0));
31525 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
31526 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
31527 update_call = build_call_expr (atomic_feraiseexcept, 1,
31528 fold_convert (integer_type_node, new_fenv_var));
31529 *update = build2 (COMPOUND_EXPR, void_type_node,
31530 build2 (COMPOUND_EXPR, void_type_node,
31531 reload_fenv, restore_fnenv), update_call);
31534 /* return TRUE if x is a reference to a value in a constant pool */
31535 extern bool
31536 arm_is_constant_pool_ref (rtx x)
31538 return (MEM_P (x)
31539 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31540 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31543 #include "gt-arm.h"