2013-10-16 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / gcc / config / arm / arm.c
blob4cdac60fd8a71bc17e37d2a664f65f8416b9bab5
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "reload.h"
39 #include "function.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "debug.h"
51 #include "langhooks.h"
52 #include "df.h"
53 #include "intl.h"
54 #include "libfuncs.h"
55 #include "params.h"
56 #include "opts.h"
57 #include "dumpfile.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
63 void (*arm_lang_output_object_attributes_hook)(void);
65 struct four_ints
67 int i[4];
70 /* Forward function declarations. */
71 static bool arm_lra_p (void);
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
142 int i);
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
147 const_tree, int);
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 tree);
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static void arm_init_builtins (void);
170 static void arm_init_iwmmxt_builtins (void);
171 static rtx safe_vector_operand (rtx, enum machine_mode);
172 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
173 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
174 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
175 static tree arm_builtin_decl (unsigned, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx emit_set_insn (rtx, rtx);
178 static rtx emit_multi_reg_push (unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
180 tree, bool);
181 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
187 const_tree);
188 static rtx aapcs_libcall_value (enum machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 #endif
195 #ifndef ARM_PE
196 static void arm_encode_section_info (tree, rtx, int);
197 #endif
199 static void arm_file_end (void);
200 static void arm_file_start (void);
202 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
203 tree, int *, int);
204 static bool arm_pass_by_reference (cumulative_args_t,
205 enum machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
212 #if ARM_UNWIND_INFO
213 static void arm_unwind_emit (FILE *, rtx);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 static void arm_asm_init_sections (void);
217 #endif
218 static rtx arm_dwarf_register_span (rtx);
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
235 static bool arm_cannot_copy_insn_p (rtx);
236 static bool arm_tls_symbol_p (rtx x);
237 static int arm_issue_rate (void);
238 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
239 static bool arm_output_addr_const_extra (FILE *, rtx);
240 static bool arm_allocate_stack_slots_for_args (void);
241 static bool arm_warn_func_return (tree);
242 static const char *arm_invalid_parameter_type (const_tree t);
243 static const char *arm_invalid_return_type (const_tree t);
244 static tree arm_promoted_type (const_tree t);
245 static tree arm_convert_to_type (tree type, tree expr);
246 static bool arm_scalar_mode_supported_p (enum machine_mode);
247 static bool arm_frame_pointer_required (void);
248 static bool arm_can_eliminate (const int, const int);
249 static void arm_asm_trampoline_template (FILE *);
250 static void arm_trampoline_init (rtx, tree, rtx);
251 static rtx arm_trampoline_adjust_address (rtx);
252 static rtx arm_pic_static_addr (rtx orig, rtx reg);
253 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool arm_array_mode_supported_p (enum machine_mode,
257 unsigned HOST_WIDE_INT);
258 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
259 static bool arm_class_likely_spilled_p (reg_class_t);
260 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
261 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
263 const_tree type,
264 int misalignment,
265 bool is_packed);
266 static void arm_conditional_register_usage (void);
267 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
272 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
273 const unsigned char *sel);
275 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
276 tree vectype,
277 int misalign ATTRIBUTE_UNUSED);
278 static unsigned arm_add_stmt_cost (void *data, int count,
279 enum vect_cost_for_stmt kind,
280 struct _stmt_vec_info *stmt_info,
281 int misalign,
282 enum vect_cost_model_location where);
284 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
285 bool op0_preserve_value);
286 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
288 /* Table of machine attributes. */
289 static const struct attribute_spec arm_attribute_table[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
292 affects_type_identity } */
293 /* Function calls made to this symbol must be done indirectly, because
294 it may lie outside of the 26 bit addressing range of a normal function
295 call. */
296 { "long_call", 0, 0, false, true, true, NULL, false },
297 /* Whereas these functions are always known to reside within the 26 bit
298 addressing range. */
299 { "short_call", 0, 0, false, true, true, NULL, false },
300 /* Specify the procedure call conventions for a function. */
301 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
302 false },
303 /* Interrupt Service Routines have special prologue and epilogue requirements. */
304 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
305 false },
306 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
307 false },
308 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #ifdef ARM_PE
311 /* ARM/PE has three new attributes:
312 interfacearm - ?
313 dllexport - for exporting a function/variable that will live in a dll
314 dllimport - for importing a function/variable from a dll
316 Microsoft allows multiple declspecs in one __declspec, separating
317 them with spaces. We do NOT support this. Instead, use __declspec
318 multiple times.
320 { "dllimport", 0, 0, true, false, false, NULL, false },
321 { "dllexport", 0, 0, true, false, false, NULL, false },
322 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
323 false },
324 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
325 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
326 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
327 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
328 false },
329 #endif
330 { NULL, 0, 0, false, false, false, NULL, false }
333 /* Initialize the GCC target structure. */
334 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
335 #undef TARGET_MERGE_DECL_ATTRIBUTES
336 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
337 #endif
339 #undef TARGET_LEGITIMIZE_ADDRESS
340 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
342 #undef TARGET_LRA_P
343 #define TARGET_LRA_P arm_lra_p
345 #undef TARGET_ATTRIBUTE_TABLE
346 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
348 #undef TARGET_ASM_FILE_START
349 #define TARGET_ASM_FILE_START arm_file_start
350 #undef TARGET_ASM_FILE_END
351 #define TARGET_ASM_FILE_END arm_file_end
353 #undef TARGET_ASM_ALIGNED_SI_OP
354 #define TARGET_ASM_ALIGNED_SI_OP NULL
355 #undef TARGET_ASM_INTEGER
356 #define TARGET_ASM_INTEGER arm_assemble_integer
358 #undef TARGET_PRINT_OPERAND
359 #define TARGET_PRINT_OPERAND arm_print_operand
360 #undef TARGET_PRINT_OPERAND_ADDRESS
361 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
362 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
363 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
368 #undef TARGET_ASM_FUNCTION_PROLOGUE
369 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
371 #undef TARGET_ASM_FUNCTION_EPILOGUE
372 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE arm_option_override
377 #undef TARGET_COMP_TYPE_ATTRIBUTES
378 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
380 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
381 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
383 #undef TARGET_SCHED_ADJUST_COST
384 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
386 #undef TARGET_SCHED_REORDER
387 #define TARGET_SCHED_REORDER arm_sched_reorder
389 #undef TARGET_REGISTER_MOVE_COST
390 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
392 #undef TARGET_MEMORY_MOVE_COST
393 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
395 #undef TARGET_ENCODE_SECTION_INFO
396 #ifdef ARM_PE
397 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
398 #else
399 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
400 #endif
402 #undef TARGET_STRIP_NAME_ENCODING
403 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
405 #undef TARGET_ASM_INTERNAL_LABEL
406 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
408 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
409 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
411 #undef TARGET_FUNCTION_VALUE
412 #define TARGET_FUNCTION_VALUE arm_function_value
414 #undef TARGET_LIBCALL_VALUE
415 #define TARGET_LIBCALL_VALUE arm_libcall_value
417 #undef TARGET_FUNCTION_VALUE_REGNO_P
418 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
420 #undef TARGET_ASM_OUTPUT_MI_THUNK
421 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
422 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
423 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
425 #undef TARGET_RTX_COSTS
426 #define TARGET_RTX_COSTS arm_rtx_costs
427 #undef TARGET_ADDRESS_COST
428 #define TARGET_ADDRESS_COST arm_address_cost
430 #undef TARGET_SHIFT_TRUNCATION_MASK
431 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
432 #undef TARGET_VECTOR_MODE_SUPPORTED_P
433 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
434 #undef TARGET_ARRAY_MODE_SUPPORTED_P
435 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
436 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
437 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
438 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
439 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
440 arm_autovectorize_vector_sizes
442 #undef TARGET_MACHINE_DEPENDENT_REORG
443 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
445 #undef TARGET_INIT_BUILTINS
446 #define TARGET_INIT_BUILTINS arm_init_builtins
447 #undef TARGET_EXPAND_BUILTIN
448 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
449 #undef TARGET_BUILTIN_DECL
450 #define TARGET_BUILTIN_DECL arm_builtin_decl
452 #undef TARGET_INIT_LIBFUNCS
453 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
455 #undef TARGET_PROMOTE_FUNCTION_MODE
456 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
459 #undef TARGET_PASS_BY_REFERENCE
460 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
461 #undef TARGET_ARG_PARTIAL_BYTES
462 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
463 #undef TARGET_FUNCTION_ARG
464 #define TARGET_FUNCTION_ARG arm_function_arg
465 #undef TARGET_FUNCTION_ARG_ADVANCE
466 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
467 #undef TARGET_FUNCTION_ARG_BOUNDARY
468 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
470 #undef TARGET_SETUP_INCOMING_VARARGS
471 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
473 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
474 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
476 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
477 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
478 #undef TARGET_TRAMPOLINE_INIT
479 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
480 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
481 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
483 #undef TARGET_WARN_FUNC_RETURN
484 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
486 #undef TARGET_DEFAULT_SHORT_ENUMS
487 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
489 #undef TARGET_ALIGN_ANON_BITFIELD
490 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
492 #undef TARGET_NARROW_VOLATILE_BITFIELD
493 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
495 #undef TARGET_CXX_GUARD_TYPE
496 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
498 #undef TARGET_CXX_GUARD_MASK_BIT
499 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
501 #undef TARGET_CXX_GET_COOKIE_SIZE
502 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
504 #undef TARGET_CXX_COOKIE_HAS_SIZE
505 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
507 #undef TARGET_CXX_CDTOR_RETURNS_THIS
508 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
510 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
511 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
513 #undef TARGET_CXX_USE_AEABI_ATEXIT
514 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
516 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
517 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
518 arm_cxx_determine_class_data_visibility
520 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
521 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
523 #undef TARGET_RETURN_IN_MSB
524 #define TARGET_RETURN_IN_MSB arm_return_in_msb
526 #undef TARGET_RETURN_IN_MEMORY
527 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
529 #undef TARGET_MUST_PASS_IN_STACK
530 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
532 #if ARM_UNWIND_INFO
533 #undef TARGET_ASM_UNWIND_EMIT
534 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
536 /* EABI unwinding tables use a different format for the typeinfo tables. */
537 #undef TARGET_ASM_TTYPE
538 #define TARGET_ASM_TTYPE arm_output_ttype
540 #undef TARGET_ARM_EABI_UNWINDER
541 #define TARGET_ARM_EABI_UNWINDER true
543 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
544 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
546 #undef TARGET_ASM_INIT_SECTIONS
547 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
548 #endif /* ARM_UNWIND_INFO */
550 #undef TARGET_DWARF_REGISTER_SPAN
551 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
553 #undef TARGET_CANNOT_COPY_INSN_P
554 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
556 #ifdef HAVE_AS_TLS
557 #undef TARGET_HAVE_TLS
558 #define TARGET_HAVE_TLS true
559 #endif
561 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
562 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
564 #undef TARGET_LEGITIMATE_CONSTANT_P
565 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
567 #undef TARGET_CANNOT_FORCE_CONST_MEM
568 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
570 #undef TARGET_MAX_ANCHOR_OFFSET
571 #define TARGET_MAX_ANCHOR_OFFSET 4095
573 /* The minimum is set such that the total size of the block
574 for a particular anchor is -4088 + 1 + 4095 bytes, which is
575 divisible by eight, ensuring natural spacing of anchors. */
576 #undef TARGET_MIN_ANCHOR_OFFSET
577 #define TARGET_MIN_ANCHOR_OFFSET -4088
579 #undef TARGET_SCHED_ISSUE_RATE
580 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
582 #undef TARGET_MANGLE_TYPE
583 #define TARGET_MANGLE_TYPE arm_mangle_type
585 #undef TARGET_BUILD_BUILTIN_VA_LIST
586 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
587 #undef TARGET_EXPAND_BUILTIN_VA_START
588 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
589 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
590 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
592 #ifdef HAVE_AS_TLS
593 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
594 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
595 #endif
597 #undef TARGET_LEGITIMATE_ADDRESS_P
598 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
600 #undef TARGET_PREFERRED_RELOAD_CLASS
601 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
603 #undef TARGET_INVALID_PARAMETER_TYPE
604 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
606 #undef TARGET_INVALID_RETURN_TYPE
607 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
609 #undef TARGET_PROMOTED_TYPE
610 #define TARGET_PROMOTED_TYPE arm_promoted_type
612 #undef TARGET_CONVERT_TO_TYPE
613 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
615 #undef TARGET_SCALAR_MODE_SUPPORTED_P
616 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
618 #undef TARGET_FRAME_POINTER_REQUIRED
619 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
621 #undef TARGET_CAN_ELIMINATE
622 #define TARGET_CAN_ELIMINATE arm_can_eliminate
624 #undef TARGET_CONDITIONAL_REGISTER_USAGE
625 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
627 #undef TARGET_CLASS_LIKELY_SPILLED_P
628 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
630 #undef TARGET_VECTORIZE_BUILTINS
631 #define TARGET_VECTORIZE_BUILTINS
633 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
634 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
635 arm_builtin_vectorized_function
637 #undef TARGET_VECTOR_ALIGNMENT
638 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
640 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
641 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
642 arm_vector_alignment_reachable
644 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
645 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
646 arm_builtin_support_vector_misalignment
648 #undef TARGET_PREFERRED_RENAME_CLASS
649 #define TARGET_PREFERRED_RENAME_CLASS \
650 arm_preferred_rename_class
652 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
653 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
654 arm_vectorize_vec_perm_const_ok
656 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
657 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
658 arm_builtin_vectorization_cost
659 #undef TARGET_VECTORIZE_ADD_STMT_COST
660 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
662 #undef TARGET_CANONICALIZE_COMPARISON
663 #define TARGET_CANONICALIZE_COMPARISON \
664 arm_canonicalize_comparison
666 #undef TARGET_ASAN_SHADOW_OFFSET
667 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
669 #undef MAX_INSN_PER_IT_BLOCK
670 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
673 struct gcc_target targetm = TARGET_INITIALIZER;
675 /* Obstack for minipool constant handling. */
676 static struct obstack minipool_obstack;
677 static char * minipool_startobj;
679 /* The maximum number of insns skipped which
680 will be conditionalised if possible. */
681 static int max_insns_skipped = 5;
683 extern FILE * asm_out_file;
685 /* True if we are currently building a constant table. */
686 int making_const_table;
688 /* The processor for which instructions should be scheduled. */
689 enum processor_type arm_tune = arm_none;
691 /* The current tuning set. */
692 const struct tune_params *current_tune;
694 /* Which floating point hardware to schedule for. */
695 int arm_fpu_attr;
697 /* Which floating popint hardware to use. */
698 const struct arm_fpu_desc *arm_fpu_desc;
700 /* Used for Thumb call_via trampolines. */
701 rtx thumb_call_via_label[14];
702 static int thumb_call_reg_needed;
704 /* Bit values used to identify processor capabilities. */
705 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
706 #define FL_ARCH3M (1 << 1) /* Extended multiply */
707 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
708 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
709 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
710 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
711 #define FL_THUMB (1 << 6) /* Thumb aware */
712 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
713 #define FL_STRONG (1 << 8) /* StrongARM */
714 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
715 #define FL_XSCALE (1 << 10) /* XScale */
716 /* spare (1 << 11) */
717 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
718 media instructions. */
719 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
720 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
721 Note: ARM6 & 7 derivatives only. */
722 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
723 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
724 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
725 profile. */
726 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
727 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
728 #define FL_NEON (1 << 20) /* Neon instructions. */
729 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
730 architecture. */
731 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
732 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
733 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
735 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
736 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
738 /* Flags that only effect tuning, not available instructions. */
739 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
740 | FL_CO_PROC)
742 #define FL_FOR_ARCH2 FL_NOTM
743 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
744 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
745 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
746 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
747 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
748 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
749 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
750 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
751 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
752 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
753 #define FL_FOR_ARCH6J FL_FOR_ARCH6
754 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
755 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
756 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
757 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
758 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
759 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
760 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
761 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
762 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
763 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
764 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
765 | FL_ARM_DIV | FL_NOTM)
767 /* The bits in this mask specify which
768 instructions we are allowed to generate. */
769 static unsigned long insn_flags = 0;
771 /* The bits in this mask specify which instruction scheduling options should
772 be used. */
773 static unsigned long tune_flags = 0;
775 /* The highest ARM architecture version supported by the
776 target. */
777 enum base_architecture arm_base_arch = BASE_ARCH_0;
779 /* The following are used in the arm.md file as equivalents to bits
780 in the above two flag variables. */
782 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
783 int arm_arch3m = 0;
785 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
786 int arm_arch4 = 0;
788 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
789 int arm_arch4t = 0;
791 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
792 int arm_arch5 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
795 int arm_arch5e = 0;
797 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
798 int arm_arch6 = 0;
800 /* Nonzero if this chip supports the ARM 6K extensions. */
801 int arm_arch6k = 0;
803 /* Nonzero if instructions present in ARMv6-M can be used. */
804 int arm_arch6m = 0;
806 /* Nonzero if this chip supports the ARM 7 extensions. */
807 int arm_arch7 = 0;
809 /* Nonzero if instructions not present in the 'M' profile can be used. */
810 int arm_arch_notm = 0;
812 /* Nonzero if instructions present in ARMv7E-M can be used. */
813 int arm_arch7em = 0;
815 /* Nonzero if instructions present in ARMv8 can be used. */
816 int arm_arch8 = 0;
818 /* Nonzero if this chip can benefit from load scheduling. */
819 int arm_ld_sched = 0;
821 /* Nonzero if this chip is a StrongARM. */
822 int arm_tune_strongarm = 0;
824 /* Nonzero if this chip supports Intel Wireless MMX technology. */
825 int arm_arch_iwmmxt = 0;
827 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
828 int arm_arch_iwmmxt2 = 0;
830 /* Nonzero if this chip is an XScale. */
831 int arm_arch_xscale = 0;
833 /* Nonzero if tuning for XScale */
834 int arm_tune_xscale = 0;
836 /* Nonzero if we want to tune for stores that access the write-buffer.
837 This typically means an ARM6 or ARM7 with MMU or MPU. */
838 int arm_tune_wbuf = 0;
840 /* Nonzero if tuning for Cortex-A9. */
841 int arm_tune_cortex_a9 = 0;
843 /* Nonzero if generating Thumb instructions. */
844 int thumb_code = 0;
846 /* Nonzero if generating Thumb-1 instructions. */
847 int thumb1_code = 0;
849 /* Nonzero if we should define __THUMB_INTERWORK__ in the
850 preprocessor.
851 XXX This is a bit of a hack, it's intended to help work around
852 problems in GLD which doesn't understand that armv5t code is
853 interworking clean. */
854 int arm_cpp_interwork = 0;
856 /* Nonzero if chip supports Thumb 2. */
857 int arm_arch_thumb2;
859 /* Nonzero if chip supports integer division instruction. */
860 int arm_arch_arm_hwdiv;
861 int arm_arch_thumb_hwdiv;
863 /* Nonzero if we should use Neon to handle 64-bits operations rather
864 than core registers. */
865 int prefer_neon_for_64bits = 0;
867 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
868 we must report the mode of the memory reference from
869 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
870 enum machine_mode output_memory_reference_mode;
872 /* The register number to be used for the PIC offset register. */
873 unsigned arm_pic_register = INVALID_REGNUM;
875 /* Set to 1 after arm_reorg has started. Reset to start at the start of
876 the next function. */
877 static int after_arm_reorg = 0;
879 enum arm_pcs arm_pcs_default;
881 /* For an explanation of these variables, see final_prescan_insn below. */
882 int arm_ccfsm_state;
883 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
884 enum arm_cond_code arm_current_cc;
886 rtx arm_target_insn;
887 int arm_target_label;
888 /* The number of conditionally executed insns, including the current insn. */
889 int arm_condexec_count = 0;
890 /* A bitmask specifying the patterns for the IT block.
891 Zero means do not output an IT block before this insn. */
892 int arm_condexec_mask = 0;
893 /* The number of bits used in arm_condexec_mask. */
894 int arm_condexec_masklen = 0;
896 /* The condition codes of the ARM, and the inverse function. */
897 static const char * const arm_condition_codes[] =
899 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
900 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
903 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
904 int arm_regs_in_sequence[] =
906 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
909 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
910 #define streq(string1, string2) (strcmp (string1, string2) == 0)
912 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
913 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
914 | (1 << PIC_OFFSET_TABLE_REGNUM)))
916 /* Initialization code. */
918 struct processors
920 const char *const name;
921 enum processor_type core;
922 const char *arch;
923 enum base_architecture base_arch;
924 const unsigned long flags;
925 const struct tune_params *const tune;
929 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
930 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
931 prefetch_slots, \
932 l1_size, \
933 l1_line_size
935 /* arm generic vectorizer costs. */
936 static const
937 struct cpu_vec_costs arm_default_vec_cost = {
938 1, /* scalar_stmt_cost. */
939 1, /* scalar load_cost. */
940 1, /* scalar_store_cost. */
941 1, /* vec_stmt_cost. */
942 1, /* vec_to_scalar_cost. */
943 1, /* scalar_to_vec_cost. */
944 1, /* vec_align_load_cost. */
945 1, /* vec_unalign_load_cost. */
946 1, /* vec_unalign_store_cost. */
947 1, /* vec_store_cost. */
948 3, /* cond_taken_branch_cost. */
949 1, /* cond_not_taken_branch_cost. */
953 const struct cpu_cost_table generic_extra_costs =
955 /* ALU */
957 0, /* Arith. */
958 0, /* Logical. */
959 0, /* Shift. */
960 COSTS_N_INSNS (1), /* Shift_reg. */
961 0, /* Arith_shift. */
962 COSTS_N_INSNS (1), /* Arith_shift_reg. */
963 0, /* Log_shift. */
964 COSTS_N_INSNS (1), /* Log_shift_reg. */
965 0, /* Extend. */
966 COSTS_N_INSNS (1), /* Extend_arith. */
967 0, /* Bfi. */
968 0, /* Bfx. */
969 0, /* Clz. */
970 COSTS_N_INSNS (1), /* non_exec. */
971 false /* non_exec_costs_exec. */
973 /* MULT SImode */
976 COSTS_N_INSNS (2), /* Simple. */
977 COSTS_N_INSNS (1), /* Flag_setting. */
978 COSTS_N_INSNS (2), /* Extend. */
979 COSTS_N_INSNS (3), /* Add. */
980 COSTS_N_INSNS (3), /* Extend_add. */
981 COSTS_N_INSNS (8) /* Idiv. */
983 /* MULT DImode */
985 0, /* Simple (N/A). */
986 0, /* Flag_setting (N/A). */
987 COSTS_N_INSNS (2), /* Extend. */
988 0, /* Add (N/A). */
989 COSTS_N_INSNS (3), /* Extend_add. */
990 0 /* Idiv (N/A). */
993 /* LD/ST */
995 COSTS_N_INSNS (2), /* Load. */
996 COSTS_N_INSNS (2), /* Load_sign_extend. */
997 COSTS_N_INSNS (3), /* Ldrd. */
998 COSTS_N_INSNS (2), /* Ldm_1st. */
999 1, /* Ldm_regs_per_insn_1st. */
1000 1, /* Ldm_regs_per_insn_subsequent. */
1001 COSTS_N_INSNS (2), /* Loadf. */
1002 COSTS_N_INSNS (3), /* Loadd. */
1003 COSTS_N_INSNS (1), /* Load_unaligned. */
1004 COSTS_N_INSNS (2), /* Store. */
1005 COSTS_N_INSNS (3), /* Strd. */
1006 COSTS_N_INSNS (2), /* Stm_1st. */
1007 1, /* Stm_regs_per_insn_1st. */
1008 1, /* Stm_regs_per_insn_subsequent. */
1009 COSTS_N_INSNS (2), /* Storef. */
1010 COSTS_N_INSNS (3), /* Stored. */
1011 COSTS_N_INSNS (1) /* Store_unaligned. */
1014 /* FP SFmode */
1016 COSTS_N_INSNS (7), /* Div. */
1017 COSTS_N_INSNS (2), /* Mult. */
1018 COSTS_N_INSNS (3), /* Mult_addsub. */
1019 COSTS_N_INSNS (3), /* Fma. */
1020 COSTS_N_INSNS (1), /* Addsub. */
1021 0, /* Fpconst. */
1022 0, /* Neg. */
1023 0, /* Compare. */
1024 0, /* Widen. */
1025 0, /* Narrow. */
1026 0, /* Toint. */
1027 0, /* Fromint. */
1028 0 /* Roundint. */
1030 /* FP DFmode */
1032 COSTS_N_INSNS (15), /* Div. */
1033 COSTS_N_INSNS (5), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (7), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 0, /* Fpconst. */
1038 0, /* Neg. */
1039 0, /* Compare. */
1040 0, /* Widen. */
1041 0, /* Narrow. */
1042 0, /* Toint. */
1043 0, /* Fromint. */
1044 0 /* Roundint. */
1047 /* Vector */
1049 COSTS_N_INSNS (1) /* Alu. */
1053 const struct cpu_cost_table cortexa15_extra_costs =
1055 /* ALU */
1057 COSTS_N_INSNS (1), /* Arith. */
1058 COSTS_N_INSNS (1), /* Logical. */
1059 COSTS_N_INSNS (1), /* Shift. */
1060 COSTS_N_INSNS (1), /* Shift_reg. */
1061 COSTS_N_INSNS (1), /* Arith_shift. */
1062 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1063 COSTS_N_INSNS (1), /* Log_shift. */
1064 COSTS_N_INSNS (1), /* Log_shift_reg. */
1065 COSTS_N_INSNS (1), /* Extend. */
1066 COSTS_N_INSNS (2), /* Extend_arith. */
1067 COSTS_N_INSNS (2), /* Bfi. */
1068 COSTS_N_INSNS (1), /* Bfx. */
1069 COSTS_N_INSNS (1), /* Clz. */
1070 COSTS_N_INSNS (1), /* non_exec. */
1071 true /* non_exec_costs_exec. */
1073 /* MULT SImode */
1076 COSTS_N_INSNS (3), /* Simple. */
1077 COSTS_N_INSNS (4), /* Flag_setting. */
1078 COSTS_N_INSNS (3), /* Extend. */
1079 COSTS_N_INSNS (4), /* Add. */
1080 COSTS_N_INSNS (4), /* Extend_add. */
1081 COSTS_N_INSNS (19) /* Idiv. */
1083 /* MULT DImode */
1085 0, /* Simple (N/A). */
1086 0, /* Flag_setting (N/A). */
1087 COSTS_N_INSNS (4), /* Extend. */
1088 0, /* Add (N/A). */
1089 COSTS_N_INSNS (6), /* Extend_add. */
1090 0 /* Idiv (N/A). */
1093 /* LD/ST */
1095 COSTS_N_INSNS (4), /* Load. */
1096 COSTS_N_INSNS (4), /* Load_sign_extend. */
1097 COSTS_N_INSNS (4), /* Ldrd. */
1098 COSTS_N_INSNS (5), /* Ldm_1st. */
1099 1, /* Ldm_regs_per_insn_1st. */
1100 2, /* Ldm_regs_per_insn_subsequent. */
1101 COSTS_N_INSNS (5), /* Loadf. */
1102 COSTS_N_INSNS (5), /* Loadd. */
1103 COSTS_N_INSNS (1), /* Load_unaligned. */
1104 COSTS_N_INSNS (1), /* Store. */
1105 COSTS_N_INSNS (1), /* Strd. */
1106 COSTS_N_INSNS (2), /* Stm_1st. */
1107 1, /* Stm_regs_per_insn_1st. */
1108 2, /* Stm_regs_per_insn_subsequent. */
1109 COSTS_N_INSNS (1), /* Storef. */
1110 COSTS_N_INSNS (1), /* Stored. */
1111 COSTS_N_INSNS (1) /* Store_unaligned. */
1114 /* FP SFmode */
1116 COSTS_N_INSNS (18), /* Div. */
1117 COSTS_N_INSNS (5), /* Mult. */
1118 COSTS_N_INSNS (3), /* Mult_addsub. */
1119 COSTS_N_INSNS (13), /* Fma. */
1120 COSTS_N_INSNS (5), /* Addsub. */
1121 COSTS_N_INSNS (5), /* Fpconst. */
1122 COSTS_N_INSNS (3), /* Neg. */
1123 COSTS_N_INSNS (3), /* Compare. */
1124 COSTS_N_INSNS (3), /* Widen. */
1125 COSTS_N_INSNS (3), /* Narrow. */
1126 COSTS_N_INSNS (3), /* Toint. */
1127 COSTS_N_INSNS (3), /* Fromint. */
1128 COSTS_N_INSNS (3) /* Roundint. */
1130 /* FP DFmode */
1132 COSTS_N_INSNS (32), /* Div. */
1133 COSTS_N_INSNS (5), /* Mult. */
1134 COSTS_N_INSNS (3), /* Mult_addsub. */
1135 COSTS_N_INSNS (13), /* Fma. */
1136 COSTS_N_INSNS (5), /* Addsub. */
1137 COSTS_N_INSNS (3), /* Fpconst. */
1138 COSTS_N_INSNS (3), /* Neg. */
1139 COSTS_N_INSNS (3), /* Compare. */
1140 COSTS_N_INSNS (3), /* Widen. */
1141 COSTS_N_INSNS (3), /* Narrow. */
1142 COSTS_N_INSNS (3), /* Toint. */
1143 COSTS_N_INSNS (3), /* Fromint. */
1144 COSTS_N_INSNS (3) /* Roundint. */
1147 /* Vector */
1149 COSTS_N_INSNS (1) /* Alu. */
1153 const struct tune_params arm_slowmul_tune =
1155 arm_slowmul_rtx_costs,
1156 NULL,
1157 NULL,
1158 3, /* Constant limit. */
1159 5, /* Max cond insns. */
1160 ARM_PREFETCH_NOT_BENEFICIAL,
1161 true, /* Prefer constant pool. */
1162 arm_default_branch_cost,
1163 false, /* Prefer LDRD/STRD. */
1164 {true, true}, /* Prefer non short circuit. */
1165 &arm_default_vec_cost, /* Vectorizer costs. */
1166 false /* Prefer Neon for 64-bits bitops. */
1169 const struct tune_params arm_fastmul_tune =
1171 arm_fastmul_rtx_costs,
1172 NULL,
1173 NULL,
1174 1, /* Constant limit. */
1175 5, /* Max cond insns. */
1176 ARM_PREFETCH_NOT_BENEFICIAL,
1177 true, /* Prefer constant pool. */
1178 arm_default_branch_cost,
1179 false, /* Prefer LDRD/STRD. */
1180 {true, true}, /* Prefer non short circuit. */
1181 &arm_default_vec_cost, /* Vectorizer costs. */
1182 false /* Prefer Neon for 64-bits bitops. */
1185 /* StrongARM has early execution of branches, so a sequence that is worth
1186 skipping is shorter. Set max_insns_skipped to a lower value. */
1188 const struct tune_params arm_strongarm_tune =
1190 arm_fastmul_rtx_costs,
1191 NULL,
1192 NULL,
1193 1, /* Constant limit. */
1194 3, /* Max cond insns. */
1195 ARM_PREFETCH_NOT_BENEFICIAL,
1196 true, /* Prefer constant pool. */
1197 arm_default_branch_cost,
1198 false, /* Prefer LDRD/STRD. */
1199 {true, true}, /* Prefer non short circuit. */
1200 &arm_default_vec_cost, /* Vectorizer costs. */
1201 false /* Prefer Neon for 64-bits bitops. */
1204 const struct tune_params arm_xscale_tune =
1206 arm_xscale_rtx_costs,
1207 NULL,
1208 xscale_sched_adjust_cost,
1209 2, /* Constant limit. */
1210 3, /* Max cond insns. */
1211 ARM_PREFETCH_NOT_BENEFICIAL,
1212 true, /* Prefer constant pool. */
1213 arm_default_branch_cost,
1214 false, /* Prefer LDRD/STRD. */
1215 {true, true}, /* Prefer non short circuit. */
1216 &arm_default_vec_cost, /* Vectorizer costs. */
1217 false /* Prefer Neon for 64-bits bitops. */
1220 const struct tune_params arm_9e_tune =
1222 arm_9e_rtx_costs,
1223 NULL,
1224 NULL,
1225 1, /* Constant limit. */
1226 5, /* Max cond insns. */
1227 ARM_PREFETCH_NOT_BENEFICIAL,
1228 true, /* Prefer constant pool. */
1229 arm_default_branch_cost,
1230 false, /* Prefer LDRD/STRD. */
1231 {true, true}, /* Prefer non short circuit. */
1232 &arm_default_vec_cost, /* Vectorizer costs. */
1233 false /* Prefer Neon for 64-bits bitops. */
1236 const struct tune_params arm_v6t2_tune =
1238 arm_9e_rtx_costs,
1239 NULL,
1240 NULL,
1241 1, /* Constant limit. */
1242 5, /* Max cond insns. */
1243 ARM_PREFETCH_NOT_BENEFICIAL,
1244 false, /* Prefer constant pool. */
1245 arm_default_branch_cost,
1246 false, /* Prefer LDRD/STRD. */
1247 {true, true}, /* Prefer non short circuit. */
1248 &arm_default_vec_cost, /* Vectorizer costs. */
1249 false /* Prefer Neon for 64-bits bitops. */
1252 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1253 const struct tune_params arm_cortex_tune =
1255 arm_9e_rtx_costs,
1256 &generic_extra_costs,
1257 NULL,
1258 1, /* Constant limit. */
1259 5, /* Max cond insns. */
1260 ARM_PREFETCH_NOT_BENEFICIAL,
1261 false, /* Prefer constant pool. */
1262 arm_default_branch_cost,
1263 false, /* Prefer LDRD/STRD. */
1264 {true, true}, /* Prefer non short circuit. */
1265 &arm_default_vec_cost, /* Vectorizer costs. */
1266 false /* Prefer Neon for 64-bits bitops. */
1269 const struct tune_params arm_cortex_a15_tune =
1271 arm_9e_rtx_costs,
1272 &cortexa15_extra_costs,
1273 NULL,
1274 1, /* Constant limit. */
1275 2, /* Max cond insns. */
1276 ARM_PREFETCH_NOT_BENEFICIAL,
1277 false, /* Prefer constant pool. */
1278 arm_default_branch_cost,
1279 true, /* Prefer LDRD/STRD. */
1280 {true, true}, /* Prefer non short circuit. */
1281 &arm_default_vec_cost, /* Vectorizer costs. */
1282 false /* Prefer Neon for 64-bits bitops. */
1285 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1286 less appealing. Set max_insns_skipped to a low value. */
1288 const struct tune_params arm_cortex_a5_tune =
1290 arm_9e_rtx_costs,
1291 NULL,
1292 NULL,
1293 1, /* Constant limit. */
1294 1, /* Max cond insns. */
1295 ARM_PREFETCH_NOT_BENEFICIAL,
1296 false, /* Prefer constant pool. */
1297 arm_cortex_a5_branch_cost,
1298 false, /* Prefer LDRD/STRD. */
1299 {false, false}, /* Prefer non short circuit. */
1300 &arm_default_vec_cost, /* Vectorizer costs. */
1301 false /* Prefer Neon for 64-bits bitops. */
1304 const struct tune_params arm_cortex_a9_tune =
1306 arm_9e_rtx_costs,
1307 NULL,
1308 cortex_a9_sched_adjust_cost,
1309 1, /* Constant limit. */
1310 5, /* Max cond insns. */
1311 ARM_PREFETCH_BENEFICIAL(4,32,32),
1312 false, /* Prefer constant pool. */
1313 arm_default_branch_cost,
1314 false, /* Prefer LDRD/STRD. */
1315 {true, true}, /* Prefer non short circuit. */
1316 &arm_default_vec_cost, /* Vectorizer costs. */
1317 false /* Prefer Neon for 64-bits bitops. */
1320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1322 const struct tune_params arm_v6m_tune =
1324 arm_9e_rtx_costs,
1325 NULL,
1326 NULL,
1327 1, /* Constant limit. */
1328 5, /* Max cond insns. */
1329 ARM_PREFETCH_NOT_BENEFICIAL,
1330 false, /* Prefer constant pool. */
1331 arm_default_branch_cost,
1332 false, /* Prefer LDRD/STRD. */
1333 {false, false}, /* Prefer non short circuit. */
1334 &arm_default_vec_cost, /* Vectorizer costs. */
1335 false /* Prefer Neon for 64-bits bitops. */
1338 const struct tune_params arm_fa726te_tune =
1340 arm_9e_rtx_costs,
1341 NULL,
1342 fa726te_sched_adjust_cost,
1343 1, /* Constant limit. */
1344 5, /* Max cond insns. */
1345 ARM_PREFETCH_NOT_BENEFICIAL,
1346 true, /* Prefer constant pool. */
1347 arm_default_branch_cost,
1348 false, /* Prefer LDRD/STRD. */
1349 {true, true}, /* Prefer non short circuit. */
1350 &arm_default_vec_cost, /* Vectorizer costs. */
1351 false /* Prefer Neon for 64-bits bitops. */
1355 /* Not all of these give usefully different compilation alternatives,
1356 but there is no simple way of generalizing them. */
1357 static const struct processors all_cores[] =
1359 /* ARM Cores */
1360 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1361 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1362 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1363 #include "arm-cores.def"
1364 #undef ARM_CORE
1365 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1368 static const struct processors all_architectures[] =
1370 /* ARM Architectures */
1371 /* We don't specify tuning costs here as it will be figured out
1372 from the core. */
1374 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1375 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1376 #include "arm-arches.def"
1377 #undef ARM_ARCH
1378 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1382 /* These are populated as commandline arguments are processed, or NULL
1383 if not specified. */
1384 static const struct processors *arm_selected_arch;
1385 static const struct processors *arm_selected_cpu;
1386 static const struct processors *arm_selected_tune;
1388 /* The name of the preprocessor macro to define for this architecture. */
1390 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1392 /* Available values for -mfpu=. */
1394 static const struct arm_fpu_desc all_fpus[] =
1396 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1397 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1398 #include "arm-fpus.def"
1399 #undef ARM_FPU
1403 /* Supported TLS relocations. */
1405 enum tls_reloc {
1406 TLS_GD32,
1407 TLS_LDM32,
1408 TLS_LDO32,
1409 TLS_IE32,
1410 TLS_LE32,
1411 TLS_DESCSEQ /* GNU scheme */
1414 /* The maximum number of insns to be used when loading a constant. */
1415 inline static int
1416 arm_constant_limit (bool size_p)
1418 return size_p ? 1 : current_tune->constant_limit;
1421 /* Emit an insn that's a simple single-set. Both the operands must be known
1422 to be valid. */
1423 inline static rtx
1424 emit_set_insn (rtx x, rtx y)
1426 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1429 /* Return the number of bits set in VALUE. */
1430 static unsigned
1431 bit_count (unsigned long value)
1433 unsigned long count = 0;
1435 while (value)
1437 count++;
1438 value &= value - 1; /* Clear the least-significant set bit. */
1441 return count;
1444 typedef struct
1446 enum machine_mode mode;
1447 const char *name;
1448 } arm_fixed_mode_set;
1450 /* A small helper for setting fixed-point library libfuncs. */
1452 static void
1453 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1454 const char *funcname, const char *modename,
1455 int num_suffix)
1457 char buffer[50];
1459 if (num_suffix == 0)
1460 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1461 else
1462 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1464 set_optab_libfunc (optable, mode, buffer);
1467 static void
1468 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1469 enum machine_mode from, const char *funcname,
1470 const char *toname, const char *fromname)
1472 char buffer[50];
1473 const char *maybe_suffix_2 = "";
1475 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1476 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1477 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1478 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1479 maybe_suffix_2 = "2";
1481 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1482 maybe_suffix_2);
1484 set_conv_libfunc (optable, to, from, buffer);
1487 /* Set up library functions unique to ARM. */
1489 static void
1490 arm_init_libfuncs (void)
1492 /* For Linux, we have access to kernel support for atomic operations. */
1493 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1494 init_sync_libfuncs (2 * UNITS_PER_WORD);
1496 /* There are no special library functions unless we are using the
1497 ARM BPABI. */
1498 if (!TARGET_BPABI)
1499 return;
1501 /* The functions below are described in Section 4 of the "Run-Time
1502 ABI for the ARM architecture", Version 1.0. */
1504 /* Double-precision floating-point arithmetic. Table 2. */
1505 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1506 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1507 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1508 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1509 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1511 /* Double-precision comparisons. Table 3. */
1512 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1513 set_optab_libfunc (ne_optab, DFmode, NULL);
1514 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1515 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1516 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1517 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1518 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1520 /* Single-precision floating-point arithmetic. Table 4. */
1521 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1522 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1523 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1524 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1525 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1527 /* Single-precision comparisons. Table 5. */
1528 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1529 set_optab_libfunc (ne_optab, SFmode, NULL);
1530 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1531 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1532 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1533 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1534 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1536 /* Floating-point to integer conversions. Table 6. */
1537 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1538 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1539 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1540 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1541 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1542 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1543 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1544 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1546 /* Conversions between floating types. Table 7. */
1547 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1548 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1550 /* Integer to floating-point conversions. Table 8. */
1551 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1552 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1553 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1554 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1555 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1556 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1557 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1558 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1560 /* Long long. Table 9. */
1561 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1562 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1563 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1564 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1565 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1566 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1567 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1568 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1570 /* Integer (32/32->32) division. \S 4.3.1. */
1571 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1572 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1574 /* The divmod functions are designed so that they can be used for
1575 plain division, even though they return both the quotient and the
1576 remainder. The quotient is returned in the usual location (i.e.,
1577 r0 for SImode, {r0, r1} for DImode), just as would be expected
1578 for an ordinary division routine. Because the AAPCS calling
1579 conventions specify that all of { r0, r1, r2, r3 } are
1580 callee-saved registers, there is no need to tell the compiler
1581 explicitly that those registers are clobbered by these
1582 routines. */
1583 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1584 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1586 /* For SImode division the ABI provides div-without-mod routines,
1587 which are faster. */
1588 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1589 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1591 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1592 divmod libcalls instead. */
1593 set_optab_libfunc (smod_optab, DImode, NULL);
1594 set_optab_libfunc (umod_optab, DImode, NULL);
1595 set_optab_libfunc (smod_optab, SImode, NULL);
1596 set_optab_libfunc (umod_optab, SImode, NULL);
1598 /* Half-precision float operations. The compiler handles all operations
1599 with NULL libfuncs by converting the SFmode. */
1600 switch (arm_fp16_format)
1602 case ARM_FP16_FORMAT_IEEE:
1603 case ARM_FP16_FORMAT_ALTERNATIVE:
1605 /* Conversions. */
1606 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1607 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1608 ? "__gnu_f2h_ieee"
1609 : "__gnu_f2h_alternative"));
1610 set_conv_libfunc (sext_optab, SFmode, HFmode,
1611 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1612 ? "__gnu_h2f_ieee"
1613 : "__gnu_h2f_alternative"));
1615 /* Arithmetic. */
1616 set_optab_libfunc (add_optab, HFmode, NULL);
1617 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1618 set_optab_libfunc (smul_optab, HFmode, NULL);
1619 set_optab_libfunc (neg_optab, HFmode, NULL);
1620 set_optab_libfunc (sub_optab, HFmode, NULL);
1622 /* Comparisons. */
1623 set_optab_libfunc (eq_optab, HFmode, NULL);
1624 set_optab_libfunc (ne_optab, HFmode, NULL);
1625 set_optab_libfunc (lt_optab, HFmode, NULL);
1626 set_optab_libfunc (le_optab, HFmode, NULL);
1627 set_optab_libfunc (ge_optab, HFmode, NULL);
1628 set_optab_libfunc (gt_optab, HFmode, NULL);
1629 set_optab_libfunc (unord_optab, HFmode, NULL);
1630 break;
1632 default:
1633 break;
1636 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1638 const arm_fixed_mode_set fixed_arith_modes[] =
1640 { QQmode, "qq" },
1641 { UQQmode, "uqq" },
1642 { HQmode, "hq" },
1643 { UHQmode, "uhq" },
1644 { SQmode, "sq" },
1645 { USQmode, "usq" },
1646 { DQmode, "dq" },
1647 { UDQmode, "udq" },
1648 { TQmode, "tq" },
1649 { UTQmode, "utq" },
1650 { HAmode, "ha" },
1651 { UHAmode, "uha" },
1652 { SAmode, "sa" },
1653 { USAmode, "usa" },
1654 { DAmode, "da" },
1655 { UDAmode, "uda" },
1656 { TAmode, "ta" },
1657 { UTAmode, "uta" }
1659 const arm_fixed_mode_set fixed_conv_modes[] =
1661 { QQmode, "qq" },
1662 { UQQmode, "uqq" },
1663 { HQmode, "hq" },
1664 { UHQmode, "uhq" },
1665 { SQmode, "sq" },
1666 { USQmode, "usq" },
1667 { DQmode, "dq" },
1668 { UDQmode, "udq" },
1669 { TQmode, "tq" },
1670 { UTQmode, "utq" },
1671 { HAmode, "ha" },
1672 { UHAmode, "uha" },
1673 { SAmode, "sa" },
1674 { USAmode, "usa" },
1675 { DAmode, "da" },
1676 { UDAmode, "uda" },
1677 { TAmode, "ta" },
1678 { UTAmode, "uta" },
1679 { QImode, "qi" },
1680 { HImode, "hi" },
1681 { SImode, "si" },
1682 { DImode, "di" },
1683 { TImode, "ti" },
1684 { SFmode, "sf" },
1685 { DFmode, "df" }
1687 unsigned int i, j;
1689 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1691 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1692 "add", fixed_arith_modes[i].name, 3);
1693 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1694 "ssadd", fixed_arith_modes[i].name, 3);
1695 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1696 "usadd", fixed_arith_modes[i].name, 3);
1697 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1698 "sub", fixed_arith_modes[i].name, 3);
1699 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1700 "sssub", fixed_arith_modes[i].name, 3);
1701 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1702 "ussub", fixed_arith_modes[i].name, 3);
1703 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1704 "mul", fixed_arith_modes[i].name, 3);
1705 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1706 "ssmul", fixed_arith_modes[i].name, 3);
1707 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1708 "usmul", fixed_arith_modes[i].name, 3);
1709 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1710 "div", fixed_arith_modes[i].name, 3);
1711 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1712 "udiv", fixed_arith_modes[i].name, 3);
1713 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1714 "ssdiv", fixed_arith_modes[i].name, 3);
1715 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1716 "usdiv", fixed_arith_modes[i].name, 3);
1717 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1718 "neg", fixed_arith_modes[i].name, 2);
1719 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1720 "ssneg", fixed_arith_modes[i].name, 2);
1721 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1722 "usneg", fixed_arith_modes[i].name, 2);
1723 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1724 "ashl", fixed_arith_modes[i].name, 3);
1725 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1726 "ashr", fixed_arith_modes[i].name, 3);
1727 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1728 "lshr", fixed_arith_modes[i].name, 3);
1729 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1730 "ssashl", fixed_arith_modes[i].name, 3);
1731 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1732 "usashl", fixed_arith_modes[i].name, 3);
1733 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1734 "cmp", fixed_arith_modes[i].name, 2);
1737 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1738 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1740 if (i == j
1741 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1742 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1743 continue;
1745 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1746 fixed_conv_modes[j].mode, "fract",
1747 fixed_conv_modes[i].name,
1748 fixed_conv_modes[j].name);
1749 arm_set_fixed_conv_libfunc (satfract_optab,
1750 fixed_conv_modes[i].mode,
1751 fixed_conv_modes[j].mode, "satfract",
1752 fixed_conv_modes[i].name,
1753 fixed_conv_modes[j].name);
1754 arm_set_fixed_conv_libfunc (fractuns_optab,
1755 fixed_conv_modes[i].mode,
1756 fixed_conv_modes[j].mode, "fractuns",
1757 fixed_conv_modes[i].name,
1758 fixed_conv_modes[j].name);
1759 arm_set_fixed_conv_libfunc (satfractuns_optab,
1760 fixed_conv_modes[i].mode,
1761 fixed_conv_modes[j].mode, "satfractuns",
1762 fixed_conv_modes[i].name,
1763 fixed_conv_modes[j].name);
1767 if (TARGET_AAPCS_BASED)
1768 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1771 /* On AAPCS systems, this is the "struct __va_list". */
1772 static GTY(()) tree va_list_type;
1774 /* Return the type to use as __builtin_va_list. */
1775 static tree
1776 arm_build_builtin_va_list (void)
1778 tree va_list_name;
1779 tree ap_field;
1781 if (!TARGET_AAPCS_BASED)
1782 return std_build_builtin_va_list ();
1784 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1785 defined as:
1787 struct __va_list
1789 void *__ap;
1792 The C Library ABI further reinforces this definition in \S
1793 4.1.
1795 We must follow this definition exactly. The structure tag
1796 name is visible in C++ mangled names, and thus forms a part
1797 of the ABI. The field name may be used by people who
1798 #include <stdarg.h>. */
1799 /* Create the type. */
1800 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1801 /* Give it the required name. */
1802 va_list_name = build_decl (BUILTINS_LOCATION,
1803 TYPE_DECL,
1804 get_identifier ("__va_list"),
1805 va_list_type);
1806 DECL_ARTIFICIAL (va_list_name) = 1;
1807 TYPE_NAME (va_list_type) = va_list_name;
1808 TYPE_STUB_DECL (va_list_type) = va_list_name;
1809 /* Create the __ap field. */
1810 ap_field = build_decl (BUILTINS_LOCATION,
1811 FIELD_DECL,
1812 get_identifier ("__ap"),
1813 ptr_type_node);
1814 DECL_ARTIFICIAL (ap_field) = 1;
1815 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1816 TYPE_FIELDS (va_list_type) = ap_field;
1817 /* Compute its layout. */
1818 layout_type (va_list_type);
1820 return va_list_type;
1823 /* Return an expression of type "void *" pointing to the next
1824 available argument in a variable-argument list. VALIST is the
1825 user-level va_list object, of type __builtin_va_list. */
1826 static tree
1827 arm_extract_valist_ptr (tree valist)
1829 if (TREE_TYPE (valist) == error_mark_node)
1830 return error_mark_node;
1832 /* On an AAPCS target, the pointer is stored within "struct
1833 va_list". */
1834 if (TARGET_AAPCS_BASED)
1836 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1837 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1838 valist, ap_field, NULL_TREE);
1841 return valist;
1844 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1845 static void
1846 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1848 valist = arm_extract_valist_ptr (valist);
1849 std_expand_builtin_va_start (valist, nextarg);
1852 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1853 static tree
1854 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1855 gimple_seq *post_p)
1857 valist = arm_extract_valist_ptr (valist);
1858 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1861 /* Fix up any incompatible options that the user has specified. */
1862 static void
1863 arm_option_override (void)
1865 if (global_options_set.x_arm_arch_option)
1866 arm_selected_arch = &all_architectures[arm_arch_option];
1868 if (global_options_set.x_arm_cpu_option)
1869 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1871 if (global_options_set.x_arm_tune_option)
1872 arm_selected_tune = &all_cores[(int) arm_tune_option];
1874 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1875 SUBTARGET_OVERRIDE_OPTIONS;
1876 #endif
1878 if (arm_selected_arch)
1880 if (arm_selected_cpu)
1882 /* Check for conflict between mcpu and march. */
1883 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1885 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1886 arm_selected_cpu->name, arm_selected_arch->name);
1887 /* -march wins for code generation.
1888 -mcpu wins for default tuning. */
1889 if (!arm_selected_tune)
1890 arm_selected_tune = arm_selected_cpu;
1892 arm_selected_cpu = arm_selected_arch;
1894 else
1895 /* -mcpu wins. */
1896 arm_selected_arch = NULL;
1898 else
1899 /* Pick a CPU based on the architecture. */
1900 arm_selected_cpu = arm_selected_arch;
1903 /* If the user did not specify a processor, choose one for them. */
1904 if (!arm_selected_cpu)
1906 const struct processors * sel;
1907 unsigned int sought;
1909 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1910 if (!arm_selected_cpu->name)
1912 #ifdef SUBTARGET_CPU_DEFAULT
1913 /* Use the subtarget default CPU if none was specified by
1914 configure. */
1915 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1916 #endif
1917 /* Default to ARM6. */
1918 if (!arm_selected_cpu->name)
1919 arm_selected_cpu = &all_cores[arm6];
1922 sel = arm_selected_cpu;
1923 insn_flags = sel->flags;
1925 /* Now check to see if the user has specified some command line
1926 switch that require certain abilities from the cpu. */
1927 sought = 0;
1929 if (TARGET_INTERWORK || TARGET_THUMB)
1931 sought |= (FL_THUMB | FL_MODE32);
1933 /* There are no ARM processors that support both APCS-26 and
1934 interworking. Therefore we force FL_MODE26 to be removed
1935 from insn_flags here (if it was set), so that the search
1936 below will always be able to find a compatible processor. */
1937 insn_flags &= ~FL_MODE26;
1940 if (sought != 0 && ((sought & insn_flags) != sought))
1942 /* Try to locate a CPU type that supports all of the abilities
1943 of the default CPU, plus the extra abilities requested by
1944 the user. */
1945 for (sel = all_cores; sel->name != NULL; sel++)
1946 if ((sel->flags & sought) == (sought | insn_flags))
1947 break;
1949 if (sel->name == NULL)
1951 unsigned current_bit_count = 0;
1952 const struct processors * best_fit = NULL;
1954 /* Ideally we would like to issue an error message here
1955 saying that it was not possible to find a CPU compatible
1956 with the default CPU, but which also supports the command
1957 line options specified by the programmer, and so they
1958 ought to use the -mcpu=<name> command line option to
1959 override the default CPU type.
1961 If we cannot find a cpu that has both the
1962 characteristics of the default cpu and the given
1963 command line options we scan the array again looking
1964 for a best match. */
1965 for (sel = all_cores; sel->name != NULL; sel++)
1966 if ((sel->flags & sought) == sought)
1968 unsigned count;
1970 count = bit_count (sel->flags & insn_flags);
1972 if (count >= current_bit_count)
1974 best_fit = sel;
1975 current_bit_count = count;
1979 gcc_assert (best_fit);
1980 sel = best_fit;
1983 arm_selected_cpu = sel;
1987 gcc_assert (arm_selected_cpu);
1988 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1989 if (!arm_selected_tune)
1990 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1992 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1993 insn_flags = arm_selected_cpu->flags;
1994 arm_base_arch = arm_selected_cpu->base_arch;
1996 arm_tune = arm_selected_tune->core;
1997 tune_flags = arm_selected_tune->flags;
1998 current_tune = arm_selected_tune->tune;
2000 /* Make sure that the processor choice does not conflict with any of the
2001 other command line choices. */
2002 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2003 error ("target CPU does not support ARM mode");
2005 /* BPABI targets use linker tricks to allow interworking on cores
2006 without thumb support. */
2007 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2009 warning (0, "target CPU does not support interworking" );
2010 target_flags &= ~MASK_INTERWORK;
2013 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2015 warning (0, "target CPU does not support THUMB instructions");
2016 target_flags &= ~MASK_THUMB;
2019 if (TARGET_APCS_FRAME && TARGET_THUMB)
2021 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2022 target_flags &= ~MASK_APCS_FRAME;
2025 /* Callee super interworking implies thumb interworking. Adding
2026 this to the flags here simplifies the logic elsewhere. */
2027 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2028 target_flags |= MASK_INTERWORK;
2030 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2031 from here where no function is being compiled currently. */
2032 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2033 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2035 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2036 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2038 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2040 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2041 target_flags |= MASK_APCS_FRAME;
2044 if (TARGET_POKE_FUNCTION_NAME)
2045 target_flags |= MASK_APCS_FRAME;
2047 if (TARGET_APCS_REENT && flag_pic)
2048 error ("-fpic and -mapcs-reent are incompatible");
2050 if (TARGET_APCS_REENT)
2051 warning (0, "APCS reentrant code not supported. Ignored");
2053 /* If this target is normally configured to use APCS frames, warn if they
2054 are turned off and debugging is turned on. */
2055 if (TARGET_ARM
2056 && write_symbols != NO_DEBUG
2057 && !TARGET_APCS_FRAME
2058 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2059 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2061 if (TARGET_APCS_FLOAT)
2062 warning (0, "passing floating point arguments in fp regs not yet supported");
2064 if (TARGET_LITTLE_WORDS)
2065 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2066 "will be removed in a future release");
2068 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2069 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2070 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2071 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2072 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2073 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2074 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2075 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2076 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2077 arm_arch6m = arm_arch6 && !arm_arch_notm;
2078 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2079 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2080 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2081 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2082 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2084 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2085 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2086 thumb_code = TARGET_ARM == 0;
2087 thumb1_code = TARGET_THUMB1 != 0;
2088 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2089 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2090 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2091 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2092 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2093 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2094 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2095 if (arm_restrict_it == 2)
2096 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2098 if (!TARGET_THUMB2)
2099 arm_restrict_it = 0;
2101 /* If we are not using the default (ARM mode) section anchor offset
2102 ranges, then set the correct ranges now. */
2103 if (TARGET_THUMB1)
2105 /* Thumb-1 LDR instructions cannot have negative offsets.
2106 Permissible positive offset ranges are 5-bit (for byte loads),
2107 6-bit (for halfword loads), or 7-bit (for word loads).
2108 Empirical results suggest a 7-bit anchor range gives the best
2109 overall code size. */
2110 targetm.min_anchor_offset = 0;
2111 targetm.max_anchor_offset = 127;
2113 else if (TARGET_THUMB2)
2115 /* The minimum is set such that the total size of the block
2116 for a particular anchor is 248 + 1 + 4095 bytes, which is
2117 divisible by eight, ensuring natural spacing of anchors. */
2118 targetm.min_anchor_offset = -248;
2119 targetm.max_anchor_offset = 4095;
2122 /* V5 code we generate is completely interworking capable, so we turn off
2123 TARGET_INTERWORK here to avoid many tests later on. */
2125 /* XXX However, we must pass the right pre-processor defines to CPP
2126 or GLD can get confused. This is a hack. */
2127 if (TARGET_INTERWORK)
2128 arm_cpp_interwork = 1;
2130 if (arm_arch5)
2131 target_flags &= ~MASK_INTERWORK;
2133 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2134 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2136 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2137 error ("iwmmxt abi requires an iwmmxt capable cpu");
2139 if (!global_options_set.x_arm_fpu_index)
2141 const char *target_fpu_name;
2142 bool ok;
2144 #ifdef FPUTYPE_DEFAULT
2145 target_fpu_name = FPUTYPE_DEFAULT;
2146 #else
2147 target_fpu_name = "vfp";
2148 #endif
2150 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2151 CL_TARGET);
2152 gcc_assert (ok);
2155 arm_fpu_desc = &all_fpus[arm_fpu_index];
2157 switch (arm_fpu_desc->model)
2159 case ARM_FP_MODEL_VFP:
2160 arm_fpu_attr = FPU_VFP;
2161 break;
2163 default:
2164 gcc_unreachable();
2167 if (TARGET_AAPCS_BASED)
2169 if (TARGET_CALLER_INTERWORKING)
2170 error ("AAPCS does not support -mcaller-super-interworking");
2171 else
2172 if (TARGET_CALLEE_INTERWORKING)
2173 error ("AAPCS does not support -mcallee-super-interworking");
2176 /* iWMMXt and NEON are incompatible. */
2177 if (TARGET_IWMMXT && TARGET_NEON)
2178 error ("iWMMXt and NEON are incompatible");
2180 /* iWMMXt unsupported under Thumb mode. */
2181 if (TARGET_THUMB && TARGET_IWMMXT)
2182 error ("iWMMXt unsupported under Thumb mode");
2184 /* __fp16 support currently assumes the core has ldrh. */
2185 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2186 sorry ("__fp16 and no ldrh");
2188 /* If soft-float is specified then don't use FPU. */
2189 if (TARGET_SOFT_FLOAT)
2190 arm_fpu_attr = FPU_NONE;
2192 if (TARGET_AAPCS_BASED)
2194 if (arm_abi == ARM_ABI_IWMMXT)
2195 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2196 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2197 && TARGET_HARD_FLOAT
2198 && TARGET_VFP)
2199 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2200 else
2201 arm_pcs_default = ARM_PCS_AAPCS;
2203 else
2205 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2206 sorry ("-mfloat-abi=hard and VFP");
2208 if (arm_abi == ARM_ABI_APCS)
2209 arm_pcs_default = ARM_PCS_APCS;
2210 else
2211 arm_pcs_default = ARM_PCS_ATPCS;
2214 /* For arm2/3 there is no need to do any scheduling if we are doing
2215 software floating-point. */
2216 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2217 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2219 /* Use the cp15 method if it is available. */
2220 if (target_thread_pointer == TP_AUTO)
2222 if (arm_arch6k && !TARGET_THUMB1)
2223 target_thread_pointer = TP_CP15;
2224 else
2225 target_thread_pointer = TP_SOFT;
2228 if (TARGET_HARD_TP && TARGET_THUMB1)
2229 error ("can not use -mtp=cp15 with 16-bit Thumb");
2231 /* Override the default structure alignment for AAPCS ABI. */
2232 if (!global_options_set.x_arm_structure_size_boundary)
2234 if (TARGET_AAPCS_BASED)
2235 arm_structure_size_boundary = 8;
2237 else
2239 if (arm_structure_size_boundary != 8
2240 && arm_structure_size_boundary != 32
2241 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2243 if (ARM_DOUBLEWORD_ALIGN)
2244 warning (0,
2245 "structure size boundary can only be set to 8, 32 or 64");
2246 else
2247 warning (0, "structure size boundary can only be set to 8 or 32");
2248 arm_structure_size_boundary
2249 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2253 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2255 error ("RTP PIC is incompatible with Thumb");
2256 flag_pic = 0;
2259 /* If stack checking is disabled, we can use r10 as the PIC register,
2260 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2261 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2263 if (TARGET_VXWORKS_RTP)
2264 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2265 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2268 if (flag_pic && TARGET_VXWORKS_RTP)
2269 arm_pic_register = 9;
2271 if (arm_pic_register_string != NULL)
2273 int pic_register = decode_reg_name (arm_pic_register_string);
2275 if (!flag_pic)
2276 warning (0, "-mpic-register= is useless without -fpic");
2278 /* Prevent the user from choosing an obviously stupid PIC register. */
2279 else if (pic_register < 0 || call_used_regs[pic_register]
2280 || pic_register == HARD_FRAME_POINTER_REGNUM
2281 || pic_register == STACK_POINTER_REGNUM
2282 || pic_register >= PC_REGNUM
2283 || (TARGET_VXWORKS_RTP
2284 && (unsigned int) pic_register != arm_pic_register))
2285 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2286 else
2287 arm_pic_register = pic_register;
2290 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2291 if (fix_cm3_ldrd == 2)
2293 if (arm_selected_cpu->core == cortexm3)
2294 fix_cm3_ldrd = 1;
2295 else
2296 fix_cm3_ldrd = 0;
2299 /* Enable -munaligned-access by default for
2300 - all ARMv6 architecture-based processors
2301 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2302 - ARMv8 architecture-base processors.
2304 Disable -munaligned-access by default for
2305 - all pre-ARMv6 architecture-based processors
2306 - ARMv6-M architecture-based processors. */
2308 if (unaligned_access == 2)
2310 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2311 unaligned_access = 1;
2312 else
2313 unaligned_access = 0;
2315 else if (unaligned_access == 1
2316 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2318 warning (0, "target CPU does not support unaligned accesses");
2319 unaligned_access = 0;
2322 if (TARGET_THUMB1 && flag_schedule_insns)
2324 /* Don't warn since it's on by default in -O2. */
2325 flag_schedule_insns = 0;
2328 if (optimize_size)
2330 /* If optimizing for size, bump the number of instructions that we
2331 are prepared to conditionally execute (even on a StrongARM). */
2332 max_insns_skipped = 6;
2334 else
2335 max_insns_skipped = current_tune->max_insns_skipped;
2337 /* Hot/Cold partitioning is not currently supported, since we can't
2338 handle literal pool placement in that case. */
2339 if (flag_reorder_blocks_and_partition)
2341 inform (input_location,
2342 "-freorder-blocks-and-partition not supported on this architecture");
2343 flag_reorder_blocks_and_partition = 0;
2344 flag_reorder_blocks = 1;
2347 if (flag_pic)
2348 /* Hoisting PIC address calculations more aggressively provides a small,
2349 but measurable, size reduction for PIC code. Therefore, we decrease
2350 the bar for unrestricted expression hoisting to the cost of PIC address
2351 calculation, which is 2 instructions. */
2352 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2353 global_options.x_param_values,
2354 global_options_set.x_param_values);
2356 /* ARM EABI defaults to strict volatile bitfields. */
2357 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2358 && abi_version_at_least(2))
2359 flag_strict_volatile_bitfields = 1;
2361 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2362 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2363 if (flag_prefetch_loop_arrays < 0
2364 && HAVE_prefetch
2365 && optimize >= 3
2366 && current_tune->num_prefetch_slots > 0)
2367 flag_prefetch_loop_arrays = 1;
2369 /* Set up parameters to be used in prefetching algorithm. Do not override the
2370 defaults unless we are tuning for a core we have researched values for. */
2371 if (current_tune->num_prefetch_slots > 0)
2372 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2373 current_tune->num_prefetch_slots,
2374 global_options.x_param_values,
2375 global_options_set.x_param_values);
2376 if (current_tune->l1_cache_line_size >= 0)
2377 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2378 current_tune->l1_cache_line_size,
2379 global_options.x_param_values,
2380 global_options_set.x_param_values);
2381 if (current_tune->l1_cache_size >= 0)
2382 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2383 current_tune->l1_cache_size,
2384 global_options.x_param_values,
2385 global_options_set.x_param_values);
2387 /* Use Neon to perform 64-bits operations rather than core
2388 registers. */
2389 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2390 if (use_neon_for_64bits == 1)
2391 prefer_neon_for_64bits = true;
2393 /* Use the alternative scheduling-pressure algorithm by default. */
2394 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2395 global_options.x_param_values,
2396 global_options_set.x_param_values);
2398 /* Disable shrink-wrap when optimizing function for size, since it tends to
2399 generate additional returns. */
2400 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2401 flag_shrink_wrap = false;
2402 /* TBD: Dwarf info for apcs frame is not handled yet. */
2403 if (TARGET_APCS_FRAME)
2404 flag_shrink_wrap = false;
2406 /* Register global variables with the garbage collector. */
2407 arm_add_gc_roots ();
2410 static void
2411 arm_add_gc_roots (void)
2413 gcc_obstack_init(&minipool_obstack);
2414 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2417 /* A table of known ARM exception types.
2418 For use with the interrupt function attribute. */
2420 typedef struct
2422 const char *const arg;
2423 const unsigned long return_value;
2425 isr_attribute_arg;
2427 static const isr_attribute_arg isr_attribute_args [] =
2429 { "IRQ", ARM_FT_ISR },
2430 { "irq", ARM_FT_ISR },
2431 { "FIQ", ARM_FT_FIQ },
2432 { "fiq", ARM_FT_FIQ },
2433 { "ABORT", ARM_FT_ISR },
2434 { "abort", ARM_FT_ISR },
2435 { "ABORT", ARM_FT_ISR },
2436 { "abort", ARM_FT_ISR },
2437 { "UNDEF", ARM_FT_EXCEPTION },
2438 { "undef", ARM_FT_EXCEPTION },
2439 { "SWI", ARM_FT_EXCEPTION },
2440 { "swi", ARM_FT_EXCEPTION },
2441 { NULL, ARM_FT_NORMAL }
2444 /* Returns the (interrupt) function type of the current
2445 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2447 static unsigned long
2448 arm_isr_value (tree argument)
2450 const isr_attribute_arg * ptr;
2451 const char * arg;
2453 if (!arm_arch_notm)
2454 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2456 /* No argument - default to IRQ. */
2457 if (argument == NULL_TREE)
2458 return ARM_FT_ISR;
2460 /* Get the value of the argument. */
2461 if (TREE_VALUE (argument) == NULL_TREE
2462 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2463 return ARM_FT_UNKNOWN;
2465 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2467 /* Check it against the list of known arguments. */
2468 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2469 if (streq (arg, ptr->arg))
2470 return ptr->return_value;
2472 /* An unrecognized interrupt type. */
2473 return ARM_FT_UNKNOWN;
2476 /* Computes the type of the current function. */
2478 static unsigned long
2479 arm_compute_func_type (void)
2481 unsigned long type = ARM_FT_UNKNOWN;
2482 tree a;
2483 tree attr;
2485 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2487 /* Decide if the current function is volatile. Such functions
2488 never return, and many memory cycles can be saved by not storing
2489 register values that will never be needed again. This optimization
2490 was added to speed up context switching in a kernel application. */
2491 if (optimize > 0
2492 && (TREE_NOTHROW (current_function_decl)
2493 || !(flag_unwind_tables
2494 || (flag_exceptions
2495 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2496 && TREE_THIS_VOLATILE (current_function_decl))
2497 type |= ARM_FT_VOLATILE;
2499 if (cfun->static_chain_decl != NULL)
2500 type |= ARM_FT_NESTED;
2502 attr = DECL_ATTRIBUTES (current_function_decl);
2504 a = lookup_attribute ("naked", attr);
2505 if (a != NULL_TREE)
2506 type |= ARM_FT_NAKED;
2508 a = lookup_attribute ("isr", attr);
2509 if (a == NULL_TREE)
2510 a = lookup_attribute ("interrupt", attr);
2512 if (a == NULL_TREE)
2513 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2514 else
2515 type |= arm_isr_value (TREE_VALUE (a));
2517 return type;
2520 /* Returns the type of the current function. */
2522 unsigned long
2523 arm_current_func_type (void)
2525 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2526 cfun->machine->func_type = arm_compute_func_type ();
2528 return cfun->machine->func_type;
2531 bool
2532 arm_allocate_stack_slots_for_args (void)
2534 /* Naked functions should not allocate stack slots for arguments. */
2535 return !IS_NAKED (arm_current_func_type ());
2538 static bool
2539 arm_warn_func_return (tree decl)
2541 /* Naked functions are implemented entirely in assembly, including the
2542 return sequence, so suppress warnings about this. */
2543 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2547 /* Output assembler code for a block containing the constant parts
2548 of a trampoline, leaving space for the variable parts.
2550 On the ARM, (if r8 is the static chain regnum, and remembering that
2551 referencing pc adds an offset of 8) the trampoline looks like:
2552 ldr r8, [pc, #0]
2553 ldr pc, [pc]
2554 .word static chain value
2555 .word function's address
2556 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2558 static void
2559 arm_asm_trampoline_template (FILE *f)
2561 if (TARGET_ARM)
2563 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2564 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2566 else if (TARGET_THUMB2)
2568 /* The Thumb-2 trampoline is similar to the arm implementation.
2569 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2570 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2571 STATIC_CHAIN_REGNUM, PC_REGNUM);
2572 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2574 else
2576 ASM_OUTPUT_ALIGN (f, 2);
2577 fprintf (f, "\t.code\t16\n");
2578 fprintf (f, ".Ltrampoline_start:\n");
2579 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2580 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2581 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2582 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2583 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2584 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2586 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2587 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2590 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2592 static void
2593 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2595 rtx fnaddr, mem, a_tramp;
2597 emit_block_move (m_tramp, assemble_trampoline_template (),
2598 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2600 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2601 emit_move_insn (mem, chain_value);
2603 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2604 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2605 emit_move_insn (mem, fnaddr);
2607 a_tramp = XEXP (m_tramp, 0);
2608 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2609 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2610 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2613 /* Thumb trampolines should be entered in thumb mode, so set
2614 the bottom bit of the address. */
2616 static rtx
2617 arm_trampoline_adjust_address (rtx addr)
2619 if (TARGET_THUMB)
2620 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2621 NULL, 0, OPTAB_LIB_WIDEN);
2622 return addr;
2625 /* Return 1 if it is possible to return using a single instruction.
2626 If SIBLING is non-null, this is a test for a return before a sibling
2627 call. SIBLING is the call insn, so we can examine its register usage. */
2630 use_return_insn (int iscond, rtx sibling)
2632 int regno;
2633 unsigned int func_type;
2634 unsigned long saved_int_regs;
2635 unsigned HOST_WIDE_INT stack_adjust;
2636 arm_stack_offsets *offsets;
2638 /* Never use a return instruction before reload has run. */
2639 if (!reload_completed)
2640 return 0;
2642 func_type = arm_current_func_type ();
2644 /* Naked, volatile and stack alignment functions need special
2645 consideration. */
2646 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2647 return 0;
2649 /* So do interrupt functions that use the frame pointer and Thumb
2650 interrupt functions. */
2651 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2652 return 0;
2654 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2655 && !optimize_function_for_size_p (cfun))
2656 return 0;
2658 offsets = arm_get_frame_offsets ();
2659 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2661 /* As do variadic functions. */
2662 if (crtl->args.pretend_args_size
2663 || cfun->machine->uses_anonymous_args
2664 /* Or if the function calls __builtin_eh_return () */
2665 || crtl->calls_eh_return
2666 /* Or if the function calls alloca */
2667 || cfun->calls_alloca
2668 /* Or if there is a stack adjustment. However, if the stack pointer
2669 is saved on the stack, we can use a pre-incrementing stack load. */
2670 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2671 && stack_adjust == 4)))
2672 return 0;
2674 saved_int_regs = offsets->saved_regs_mask;
2676 /* Unfortunately, the insn
2678 ldmib sp, {..., sp, ...}
2680 triggers a bug on most SA-110 based devices, such that the stack
2681 pointer won't be correctly restored if the instruction takes a
2682 page fault. We work around this problem by popping r3 along with
2683 the other registers, since that is never slower than executing
2684 another instruction.
2686 We test for !arm_arch5 here, because code for any architecture
2687 less than this could potentially be run on one of the buggy
2688 chips. */
2689 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2691 /* Validate that r3 is a call-clobbered register (always true in
2692 the default abi) ... */
2693 if (!call_used_regs[3])
2694 return 0;
2696 /* ... that it isn't being used for a return value ... */
2697 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2698 return 0;
2700 /* ... or for a tail-call argument ... */
2701 if (sibling)
2703 gcc_assert (CALL_P (sibling));
2705 if (find_regno_fusage (sibling, USE, 3))
2706 return 0;
2709 /* ... and that there are no call-saved registers in r0-r2
2710 (always true in the default ABI). */
2711 if (saved_int_regs & 0x7)
2712 return 0;
2715 /* Can't be done if interworking with Thumb, and any registers have been
2716 stacked. */
2717 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2718 return 0;
2720 /* On StrongARM, conditional returns are expensive if they aren't
2721 taken and multiple registers have been stacked. */
2722 if (iscond && arm_tune_strongarm)
2724 /* Conditional return when just the LR is stored is a simple
2725 conditional-load instruction, that's not expensive. */
2726 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2727 return 0;
2729 if (flag_pic
2730 && arm_pic_register != INVALID_REGNUM
2731 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2732 return 0;
2735 /* If there are saved registers but the LR isn't saved, then we need
2736 two instructions for the return. */
2737 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2738 return 0;
2740 /* Can't be done if any of the VFP regs are pushed,
2741 since this also requires an insn. */
2742 if (TARGET_HARD_FLOAT && TARGET_VFP)
2743 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2744 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2745 return 0;
2747 if (TARGET_REALLY_IWMMXT)
2748 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2750 return 0;
2752 return 1;
2755 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2756 shrink-wrapping if possible. This is the case if we need to emit a
2757 prologue, which we can test by looking at the offsets. */
2758 bool
2759 use_simple_return_p (void)
2761 arm_stack_offsets *offsets;
2763 offsets = arm_get_frame_offsets ();
2764 return offsets->outgoing_args != 0;
2767 /* Return TRUE if int I is a valid immediate ARM constant. */
2770 const_ok_for_arm (HOST_WIDE_INT i)
2772 int lowbit;
2774 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2775 be all zero, or all one. */
2776 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2777 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2778 != ((~(unsigned HOST_WIDE_INT) 0)
2779 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2780 return FALSE;
2782 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2784 /* Fast return for 0 and small values. We must do this for zero, since
2785 the code below can't handle that one case. */
2786 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2787 return TRUE;
2789 /* Get the number of trailing zeros. */
2790 lowbit = ffs((int) i) - 1;
2792 /* Only even shifts are allowed in ARM mode so round down to the
2793 nearest even number. */
2794 if (TARGET_ARM)
2795 lowbit &= ~1;
2797 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2798 return TRUE;
2800 if (TARGET_ARM)
2802 /* Allow rotated constants in ARM mode. */
2803 if (lowbit <= 4
2804 && ((i & ~0xc000003f) == 0
2805 || (i & ~0xf000000f) == 0
2806 || (i & ~0xfc000003) == 0))
2807 return TRUE;
2809 else
2811 HOST_WIDE_INT v;
2813 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2814 v = i & 0xff;
2815 v |= v << 16;
2816 if (i == v || i == (v | (v << 8)))
2817 return TRUE;
2819 /* Allow repeated pattern 0xXY00XY00. */
2820 v = i & 0xff00;
2821 v |= v << 16;
2822 if (i == v)
2823 return TRUE;
2826 return FALSE;
2829 /* Return true if I is a valid constant for the operation CODE. */
2831 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2833 if (const_ok_for_arm (i))
2834 return 1;
2836 switch (code)
2838 case SET:
2839 /* See if we can use movw. */
2840 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2841 return 1;
2842 else
2843 /* Otherwise, try mvn. */
2844 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2846 case PLUS:
2847 /* See if we can use addw or subw. */
2848 if (TARGET_THUMB2
2849 && ((i & 0xfffff000) == 0
2850 || ((-i) & 0xfffff000) == 0))
2851 return 1;
2852 /* else fall through. */
2854 case COMPARE:
2855 case EQ:
2856 case NE:
2857 case GT:
2858 case LE:
2859 case LT:
2860 case GE:
2861 case GEU:
2862 case LTU:
2863 case GTU:
2864 case LEU:
2865 case UNORDERED:
2866 case ORDERED:
2867 case UNEQ:
2868 case UNGE:
2869 case UNLT:
2870 case UNGT:
2871 case UNLE:
2872 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2874 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2875 case XOR:
2876 return 0;
2878 case IOR:
2879 if (TARGET_THUMB2)
2880 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2881 return 0;
2883 case AND:
2884 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2886 default:
2887 gcc_unreachable ();
2891 /* Return true if I is a valid di mode constant for the operation CODE. */
2893 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2895 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2896 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2897 rtx hi = GEN_INT (hi_val);
2898 rtx lo = GEN_INT (lo_val);
2900 if (TARGET_THUMB1)
2901 return 0;
2903 switch (code)
2905 case AND:
2906 case IOR:
2907 case XOR:
2908 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
2909 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
2910 case PLUS:
2911 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2913 default:
2914 return 0;
2918 /* Emit a sequence of insns to handle a large constant.
2919 CODE is the code of the operation required, it can be any of SET, PLUS,
2920 IOR, AND, XOR, MINUS;
2921 MODE is the mode in which the operation is being performed;
2922 VAL is the integer to operate on;
2923 SOURCE is the other operand (a register, or a null-pointer for SET);
2924 SUBTARGETS means it is safe to create scratch registers if that will
2925 either produce a simpler sequence, or we will want to cse the values.
2926 Return value is the number of insns emitted. */
2928 /* ??? Tweak this for thumb2. */
2930 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2931 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2933 rtx cond;
2935 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2936 cond = COND_EXEC_TEST (PATTERN (insn));
2937 else
2938 cond = NULL_RTX;
2940 if (subtargets || code == SET
2941 || (REG_P (target) && REG_P (source)
2942 && REGNO (target) != REGNO (source)))
2944 /* After arm_reorg has been called, we can't fix up expensive
2945 constants by pushing them into memory so we must synthesize
2946 them in-line, regardless of the cost. This is only likely to
2947 be more costly on chips that have load delay slots and we are
2948 compiling without running the scheduler (so no splitting
2949 occurred before the final instruction emission).
2951 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2953 if (!after_arm_reorg
2954 && !cond
2955 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2956 1, 0)
2957 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2958 + (code != SET))))
2960 if (code == SET)
2962 /* Currently SET is the only monadic value for CODE, all
2963 the rest are diadic. */
2964 if (TARGET_USE_MOVT)
2965 arm_emit_movpair (target, GEN_INT (val));
2966 else
2967 emit_set_insn (target, GEN_INT (val));
2969 return 1;
2971 else
2973 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2975 if (TARGET_USE_MOVT)
2976 arm_emit_movpair (temp, GEN_INT (val));
2977 else
2978 emit_set_insn (temp, GEN_INT (val));
2980 /* For MINUS, the value is subtracted from, since we never
2981 have subtraction of a constant. */
2982 if (code == MINUS)
2983 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2984 else
2985 emit_set_insn (target,
2986 gen_rtx_fmt_ee (code, mode, source, temp));
2987 return 2;
2992 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2996 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2997 ARM/THUMB2 immediates, and add up to VAL.
2998 Thr function return value gives the number of insns required. */
2999 static int
3000 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3001 struct four_ints *return_sequence)
3003 int best_consecutive_zeros = 0;
3004 int i;
3005 int best_start = 0;
3006 int insns1, insns2;
3007 struct four_ints tmp_sequence;
3009 /* If we aren't targeting ARM, the best place to start is always at
3010 the bottom, otherwise look more closely. */
3011 if (TARGET_ARM)
3013 for (i = 0; i < 32; i += 2)
3015 int consecutive_zeros = 0;
3017 if (!(val & (3 << i)))
3019 while ((i < 32) && !(val & (3 << i)))
3021 consecutive_zeros += 2;
3022 i += 2;
3024 if (consecutive_zeros > best_consecutive_zeros)
3026 best_consecutive_zeros = consecutive_zeros;
3027 best_start = i - consecutive_zeros;
3029 i -= 2;
3034 /* So long as it won't require any more insns to do so, it's
3035 desirable to emit a small constant (in bits 0...9) in the last
3036 insn. This way there is more chance that it can be combined with
3037 a later addressing insn to form a pre-indexed load or store
3038 operation. Consider:
3040 *((volatile int *)0xe0000100) = 1;
3041 *((volatile int *)0xe0000110) = 2;
3043 We want this to wind up as:
3045 mov rA, #0xe0000000
3046 mov rB, #1
3047 str rB, [rA, #0x100]
3048 mov rB, #2
3049 str rB, [rA, #0x110]
3051 rather than having to synthesize both large constants from scratch.
3053 Therefore, we calculate how many insns would be required to emit
3054 the constant starting from `best_start', and also starting from
3055 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3056 yield a shorter sequence, we may as well use zero. */
3057 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3058 if (best_start != 0
3059 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3061 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3062 if (insns2 <= insns1)
3064 *return_sequence = tmp_sequence;
3065 insns1 = insns2;
3069 return insns1;
3072 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3073 static int
3074 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3075 struct four_ints *return_sequence, int i)
3077 int remainder = val & 0xffffffff;
3078 int insns = 0;
3080 /* Try and find a way of doing the job in either two or three
3081 instructions.
3083 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3084 location. We start at position I. This may be the MSB, or
3085 optimial_immediate_sequence may have positioned it at the largest block
3086 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3087 wrapping around to the top of the word when we drop off the bottom.
3088 In the worst case this code should produce no more than four insns.
3090 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3091 constants, shifted to any arbitrary location. We should always start
3092 at the MSB. */
3095 int end;
3096 unsigned int b1, b2, b3, b4;
3097 unsigned HOST_WIDE_INT result;
3098 int loc;
3100 gcc_assert (insns < 4);
3102 if (i <= 0)
3103 i += 32;
3105 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3106 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3108 loc = i;
3109 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3110 /* We can use addw/subw for the last 12 bits. */
3111 result = remainder;
3112 else
3114 /* Use an 8-bit shifted/rotated immediate. */
3115 end = i - 8;
3116 if (end < 0)
3117 end += 32;
3118 result = remainder & ((0x0ff << end)
3119 | ((i < end) ? (0xff >> (32 - end))
3120 : 0));
3121 i -= 8;
3124 else
3126 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3127 arbitrary shifts. */
3128 i -= TARGET_ARM ? 2 : 1;
3129 continue;
3132 /* Next, see if we can do a better job with a thumb2 replicated
3133 constant.
3135 We do it this way around to catch the cases like 0x01F001E0 where
3136 two 8-bit immediates would work, but a replicated constant would
3137 make it worse.
3139 TODO: 16-bit constants that don't clear all the bits, but still win.
3140 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3141 if (TARGET_THUMB2)
3143 b1 = (remainder & 0xff000000) >> 24;
3144 b2 = (remainder & 0x00ff0000) >> 16;
3145 b3 = (remainder & 0x0000ff00) >> 8;
3146 b4 = remainder & 0xff;
3148 if (loc > 24)
3150 /* The 8-bit immediate already found clears b1 (and maybe b2),
3151 but must leave b3 and b4 alone. */
3153 /* First try to find a 32-bit replicated constant that clears
3154 almost everything. We can assume that we can't do it in one,
3155 or else we wouldn't be here. */
3156 unsigned int tmp = b1 & b2 & b3 & b4;
3157 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3158 + (tmp << 24);
3159 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3160 + (tmp == b3) + (tmp == b4);
3161 if (tmp
3162 && (matching_bytes >= 3
3163 || (matching_bytes == 2
3164 && const_ok_for_op (remainder & ~tmp2, code))))
3166 /* At least 3 of the bytes match, and the fourth has at
3167 least as many bits set, or two of the bytes match
3168 and it will only require one more insn to finish. */
3169 result = tmp2;
3170 i = tmp != b1 ? 32
3171 : tmp != b2 ? 24
3172 : tmp != b3 ? 16
3173 : 8;
3176 /* Second, try to find a 16-bit replicated constant that can
3177 leave three of the bytes clear. If b2 or b4 is already
3178 zero, then we can. If the 8-bit from above would not
3179 clear b2 anyway, then we still win. */
3180 else if (b1 == b3 && (!b2 || !b4
3181 || (remainder & 0x00ff0000 & ~result)))
3183 result = remainder & 0xff00ff00;
3184 i = 24;
3187 else if (loc > 16)
3189 /* The 8-bit immediate already found clears b2 (and maybe b3)
3190 and we don't get here unless b1 is alredy clear, but it will
3191 leave b4 unchanged. */
3193 /* If we can clear b2 and b4 at once, then we win, since the
3194 8-bits couldn't possibly reach that far. */
3195 if (b2 == b4)
3197 result = remainder & 0x00ff00ff;
3198 i = 16;
3203 return_sequence->i[insns++] = result;
3204 remainder &= ~result;
3206 if (code == SET || code == MINUS)
3207 code = PLUS;
3209 while (remainder);
3211 return insns;
3214 /* Emit an instruction with the indicated PATTERN. If COND is
3215 non-NULL, conditionalize the execution of the instruction on COND
3216 being true. */
3218 static void
3219 emit_constant_insn (rtx cond, rtx pattern)
3221 if (cond)
3222 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3223 emit_insn (pattern);
3226 /* As above, but extra parameter GENERATE which, if clear, suppresses
3227 RTL generation. */
3229 static int
3230 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3231 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3232 int generate)
3234 int can_invert = 0;
3235 int can_negate = 0;
3236 int final_invert = 0;
3237 int i;
3238 int set_sign_bit_copies = 0;
3239 int clear_sign_bit_copies = 0;
3240 int clear_zero_bit_copies = 0;
3241 int set_zero_bit_copies = 0;
3242 int insns = 0, neg_insns, inv_insns;
3243 unsigned HOST_WIDE_INT temp1, temp2;
3244 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3245 struct four_ints *immediates;
3246 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3248 /* Find out which operations are safe for a given CODE. Also do a quick
3249 check for degenerate cases; these can occur when DImode operations
3250 are split. */
3251 switch (code)
3253 case SET:
3254 can_invert = 1;
3255 break;
3257 case PLUS:
3258 can_negate = 1;
3259 break;
3261 case IOR:
3262 if (remainder == 0xffffffff)
3264 if (generate)
3265 emit_constant_insn (cond,
3266 gen_rtx_SET (VOIDmode, target,
3267 GEN_INT (ARM_SIGN_EXTEND (val))));
3268 return 1;
3271 if (remainder == 0)
3273 if (reload_completed && rtx_equal_p (target, source))
3274 return 0;
3276 if (generate)
3277 emit_constant_insn (cond,
3278 gen_rtx_SET (VOIDmode, target, source));
3279 return 1;
3281 break;
3283 case AND:
3284 if (remainder == 0)
3286 if (generate)
3287 emit_constant_insn (cond,
3288 gen_rtx_SET (VOIDmode, target, const0_rtx));
3289 return 1;
3291 if (remainder == 0xffffffff)
3293 if (reload_completed && rtx_equal_p (target, source))
3294 return 0;
3295 if (generate)
3296 emit_constant_insn (cond,
3297 gen_rtx_SET (VOIDmode, target, source));
3298 return 1;
3300 can_invert = 1;
3301 break;
3303 case XOR:
3304 if (remainder == 0)
3306 if (reload_completed && rtx_equal_p (target, source))
3307 return 0;
3308 if (generate)
3309 emit_constant_insn (cond,
3310 gen_rtx_SET (VOIDmode, target, source));
3311 return 1;
3314 if (remainder == 0xffffffff)
3316 if (generate)
3317 emit_constant_insn (cond,
3318 gen_rtx_SET (VOIDmode, target,
3319 gen_rtx_NOT (mode, source)));
3320 return 1;
3322 final_invert = 1;
3323 break;
3325 case MINUS:
3326 /* We treat MINUS as (val - source), since (source - val) is always
3327 passed as (source + (-val)). */
3328 if (remainder == 0)
3330 if (generate)
3331 emit_constant_insn (cond,
3332 gen_rtx_SET (VOIDmode, target,
3333 gen_rtx_NEG (mode, source)));
3334 return 1;
3336 if (const_ok_for_arm (val))
3338 if (generate)
3339 emit_constant_insn (cond,
3340 gen_rtx_SET (VOIDmode, target,
3341 gen_rtx_MINUS (mode, GEN_INT (val),
3342 source)));
3343 return 1;
3346 break;
3348 default:
3349 gcc_unreachable ();
3352 /* If we can do it in one insn get out quickly. */
3353 if (const_ok_for_op (val, code))
3355 if (generate)
3356 emit_constant_insn (cond,
3357 gen_rtx_SET (VOIDmode, target,
3358 (source
3359 ? gen_rtx_fmt_ee (code, mode, source,
3360 GEN_INT (val))
3361 : GEN_INT (val))));
3362 return 1;
3365 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3366 insn. */
3367 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3368 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3370 if (generate)
3372 if (mode == SImode && i == 16)
3373 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3374 smaller insn. */
3375 emit_constant_insn (cond,
3376 gen_zero_extendhisi2
3377 (target, gen_lowpart (HImode, source)));
3378 else
3379 /* Extz only supports SImode, but we can coerce the operands
3380 into that mode. */
3381 emit_constant_insn (cond,
3382 gen_extzv_t2 (gen_lowpart (SImode, target),
3383 gen_lowpart (SImode, source),
3384 GEN_INT (i), const0_rtx));
3387 return 1;
3390 /* Calculate a few attributes that may be useful for specific
3391 optimizations. */
3392 /* Count number of leading zeros. */
3393 for (i = 31; i >= 0; i--)
3395 if ((remainder & (1 << i)) == 0)
3396 clear_sign_bit_copies++;
3397 else
3398 break;
3401 /* Count number of leading 1's. */
3402 for (i = 31; i >= 0; i--)
3404 if ((remainder & (1 << i)) != 0)
3405 set_sign_bit_copies++;
3406 else
3407 break;
3410 /* Count number of trailing zero's. */
3411 for (i = 0; i <= 31; i++)
3413 if ((remainder & (1 << i)) == 0)
3414 clear_zero_bit_copies++;
3415 else
3416 break;
3419 /* Count number of trailing 1's. */
3420 for (i = 0; i <= 31; i++)
3422 if ((remainder & (1 << i)) != 0)
3423 set_zero_bit_copies++;
3424 else
3425 break;
3428 switch (code)
3430 case SET:
3431 /* See if we can do this by sign_extending a constant that is known
3432 to be negative. This is a good, way of doing it, since the shift
3433 may well merge into a subsequent insn. */
3434 if (set_sign_bit_copies > 1)
3436 if (const_ok_for_arm
3437 (temp1 = ARM_SIGN_EXTEND (remainder
3438 << (set_sign_bit_copies - 1))))
3440 if (generate)
3442 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3443 emit_constant_insn (cond,
3444 gen_rtx_SET (VOIDmode, new_src,
3445 GEN_INT (temp1)));
3446 emit_constant_insn (cond,
3447 gen_ashrsi3 (target, new_src,
3448 GEN_INT (set_sign_bit_copies - 1)));
3450 return 2;
3452 /* For an inverted constant, we will need to set the low bits,
3453 these will be shifted out of harm's way. */
3454 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3455 if (const_ok_for_arm (~temp1))
3457 if (generate)
3459 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3460 emit_constant_insn (cond,
3461 gen_rtx_SET (VOIDmode, new_src,
3462 GEN_INT (temp1)));
3463 emit_constant_insn (cond,
3464 gen_ashrsi3 (target, new_src,
3465 GEN_INT (set_sign_bit_copies - 1)));
3467 return 2;
3471 /* See if we can calculate the value as the difference between two
3472 valid immediates. */
3473 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3475 int topshift = clear_sign_bit_copies & ~1;
3477 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3478 & (0xff000000 >> topshift));
3480 /* If temp1 is zero, then that means the 9 most significant
3481 bits of remainder were 1 and we've caused it to overflow.
3482 When topshift is 0 we don't need to do anything since we
3483 can borrow from 'bit 32'. */
3484 if (temp1 == 0 && topshift != 0)
3485 temp1 = 0x80000000 >> (topshift - 1);
3487 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3489 if (const_ok_for_arm (temp2))
3491 if (generate)
3493 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3494 emit_constant_insn (cond,
3495 gen_rtx_SET (VOIDmode, new_src,
3496 GEN_INT (temp1)));
3497 emit_constant_insn (cond,
3498 gen_addsi3 (target, new_src,
3499 GEN_INT (-temp2)));
3502 return 2;
3506 /* See if we can generate this by setting the bottom (or the top)
3507 16 bits, and then shifting these into the other half of the
3508 word. We only look for the simplest cases, to do more would cost
3509 too much. Be careful, however, not to generate this when the
3510 alternative would take fewer insns. */
3511 if (val & 0xffff0000)
3513 temp1 = remainder & 0xffff0000;
3514 temp2 = remainder & 0x0000ffff;
3516 /* Overlaps outside this range are best done using other methods. */
3517 for (i = 9; i < 24; i++)
3519 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3520 && !const_ok_for_arm (temp2))
3522 rtx new_src = (subtargets
3523 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3524 : target);
3525 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3526 source, subtargets, generate);
3527 source = new_src;
3528 if (generate)
3529 emit_constant_insn
3530 (cond,
3531 gen_rtx_SET
3532 (VOIDmode, target,
3533 gen_rtx_IOR (mode,
3534 gen_rtx_ASHIFT (mode, source,
3535 GEN_INT (i)),
3536 source)));
3537 return insns + 1;
3541 /* Don't duplicate cases already considered. */
3542 for (i = 17; i < 24; i++)
3544 if (((temp1 | (temp1 >> i)) == remainder)
3545 && !const_ok_for_arm (temp1))
3547 rtx new_src = (subtargets
3548 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3549 : target);
3550 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3551 source, subtargets, generate);
3552 source = new_src;
3553 if (generate)
3554 emit_constant_insn
3555 (cond,
3556 gen_rtx_SET (VOIDmode, target,
3557 gen_rtx_IOR
3558 (mode,
3559 gen_rtx_LSHIFTRT (mode, source,
3560 GEN_INT (i)),
3561 source)));
3562 return insns + 1;
3566 break;
3568 case IOR:
3569 case XOR:
3570 /* If we have IOR or XOR, and the constant can be loaded in a
3571 single instruction, and we can find a temporary to put it in,
3572 then this can be done in two instructions instead of 3-4. */
3573 if (subtargets
3574 /* TARGET can't be NULL if SUBTARGETS is 0 */
3575 || (reload_completed && !reg_mentioned_p (target, source)))
3577 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3579 if (generate)
3581 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3583 emit_constant_insn (cond,
3584 gen_rtx_SET (VOIDmode, sub,
3585 GEN_INT (val)));
3586 emit_constant_insn (cond,
3587 gen_rtx_SET (VOIDmode, target,
3588 gen_rtx_fmt_ee (code, mode,
3589 source, sub)));
3591 return 2;
3595 if (code == XOR)
3596 break;
3598 /* Convert.
3599 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3600 and the remainder 0s for e.g. 0xfff00000)
3601 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3603 This can be done in 2 instructions by using shifts with mov or mvn.
3604 e.g. for
3605 x = x | 0xfff00000;
3606 we generate.
3607 mvn r0, r0, asl #12
3608 mvn r0, r0, lsr #12 */
3609 if (set_sign_bit_copies > 8
3610 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3612 if (generate)
3614 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3615 rtx shift = GEN_INT (set_sign_bit_copies);
3617 emit_constant_insn
3618 (cond,
3619 gen_rtx_SET (VOIDmode, sub,
3620 gen_rtx_NOT (mode,
3621 gen_rtx_ASHIFT (mode,
3622 source,
3623 shift))));
3624 emit_constant_insn
3625 (cond,
3626 gen_rtx_SET (VOIDmode, target,
3627 gen_rtx_NOT (mode,
3628 gen_rtx_LSHIFTRT (mode, sub,
3629 shift))));
3631 return 2;
3634 /* Convert
3635 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3637 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3639 For eg. r0 = r0 | 0xfff
3640 mvn r0, r0, lsr #12
3641 mvn r0, r0, asl #12
3644 if (set_zero_bit_copies > 8
3645 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3647 if (generate)
3649 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3650 rtx shift = GEN_INT (set_zero_bit_copies);
3652 emit_constant_insn
3653 (cond,
3654 gen_rtx_SET (VOIDmode, sub,
3655 gen_rtx_NOT (mode,
3656 gen_rtx_LSHIFTRT (mode,
3657 source,
3658 shift))));
3659 emit_constant_insn
3660 (cond,
3661 gen_rtx_SET (VOIDmode, target,
3662 gen_rtx_NOT (mode,
3663 gen_rtx_ASHIFT (mode, sub,
3664 shift))));
3666 return 2;
3669 /* This will never be reached for Thumb2 because orn is a valid
3670 instruction. This is for Thumb1 and the ARM 32 bit cases.
3672 x = y | constant (such that ~constant is a valid constant)
3673 Transform this to
3674 x = ~(~y & ~constant).
3676 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3678 if (generate)
3680 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3681 emit_constant_insn (cond,
3682 gen_rtx_SET (VOIDmode, sub,
3683 gen_rtx_NOT (mode, source)));
3684 source = sub;
3685 if (subtargets)
3686 sub = gen_reg_rtx (mode);
3687 emit_constant_insn (cond,
3688 gen_rtx_SET (VOIDmode, sub,
3689 gen_rtx_AND (mode, source,
3690 GEN_INT (temp1))));
3691 emit_constant_insn (cond,
3692 gen_rtx_SET (VOIDmode, target,
3693 gen_rtx_NOT (mode, sub)));
3695 return 3;
3697 break;
3699 case AND:
3700 /* See if two shifts will do 2 or more insn's worth of work. */
3701 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3703 HOST_WIDE_INT shift_mask = ((0xffffffff
3704 << (32 - clear_sign_bit_copies))
3705 & 0xffffffff);
3707 if ((remainder | shift_mask) != 0xffffffff)
3709 if (generate)
3711 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3712 insns = arm_gen_constant (AND, mode, cond,
3713 remainder | shift_mask,
3714 new_src, source, subtargets, 1);
3715 source = new_src;
3717 else
3719 rtx targ = subtargets ? NULL_RTX : target;
3720 insns = arm_gen_constant (AND, mode, cond,
3721 remainder | shift_mask,
3722 targ, source, subtargets, 0);
3726 if (generate)
3728 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3729 rtx shift = GEN_INT (clear_sign_bit_copies);
3731 emit_insn (gen_ashlsi3 (new_src, source, shift));
3732 emit_insn (gen_lshrsi3 (target, new_src, shift));
3735 return insns + 2;
3738 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3740 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3742 if ((remainder | shift_mask) != 0xffffffff)
3744 if (generate)
3746 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3748 insns = arm_gen_constant (AND, mode, cond,
3749 remainder | shift_mask,
3750 new_src, source, subtargets, 1);
3751 source = new_src;
3753 else
3755 rtx targ = subtargets ? NULL_RTX : target;
3757 insns = arm_gen_constant (AND, mode, cond,
3758 remainder | shift_mask,
3759 targ, source, subtargets, 0);
3763 if (generate)
3765 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3766 rtx shift = GEN_INT (clear_zero_bit_copies);
3768 emit_insn (gen_lshrsi3 (new_src, source, shift));
3769 emit_insn (gen_ashlsi3 (target, new_src, shift));
3772 return insns + 2;
3775 break;
3777 default:
3778 break;
3781 /* Calculate what the instruction sequences would be if we generated it
3782 normally, negated, or inverted. */
3783 if (code == AND)
3784 /* AND cannot be split into multiple insns, so invert and use BIC. */
3785 insns = 99;
3786 else
3787 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3789 if (can_negate)
3790 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3791 &neg_immediates);
3792 else
3793 neg_insns = 99;
3795 if (can_invert || final_invert)
3796 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3797 &inv_immediates);
3798 else
3799 inv_insns = 99;
3801 immediates = &pos_immediates;
3803 /* Is the negated immediate sequence more efficient? */
3804 if (neg_insns < insns && neg_insns <= inv_insns)
3806 insns = neg_insns;
3807 immediates = &neg_immediates;
3809 else
3810 can_negate = 0;
3812 /* Is the inverted immediate sequence more efficient?
3813 We must allow for an extra NOT instruction for XOR operations, although
3814 there is some chance that the final 'mvn' will get optimized later. */
3815 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3817 insns = inv_insns;
3818 immediates = &inv_immediates;
3820 else
3822 can_invert = 0;
3823 final_invert = 0;
3826 /* Now output the chosen sequence as instructions. */
3827 if (generate)
3829 for (i = 0; i < insns; i++)
3831 rtx new_src, temp1_rtx;
3833 temp1 = immediates->i[i];
3835 if (code == SET || code == MINUS)
3836 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3837 else if ((final_invert || i < (insns - 1)) && subtargets)
3838 new_src = gen_reg_rtx (mode);
3839 else
3840 new_src = target;
3842 if (can_invert)
3843 temp1 = ~temp1;
3844 else if (can_negate)
3845 temp1 = -temp1;
3847 temp1 = trunc_int_for_mode (temp1, mode);
3848 temp1_rtx = GEN_INT (temp1);
3850 if (code == SET)
3852 else if (code == MINUS)
3853 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3854 else
3855 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3857 emit_constant_insn (cond,
3858 gen_rtx_SET (VOIDmode, new_src,
3859 temp1_rtx));
3860 source = new_src;
3862 if (code == SET)
3864 can_negate = can_invert;
3865 can_invert = 0;
3866 code = PLUS;
3868 else if (code == MINUS)
3869 code = PLUS;
3873 if (final_invert)
3875 if (generate)
3876 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3877 gen_rtx_NOT (mode, source)));
3878 insns++;
3881 return insns;
3884 /* Canonicalize a comparison so that we are more likely to recognize it.
3885 This can be done for a few constant compares, where we can make the
3886 immediate value easier to load. */
3888 static void
3889 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3890 bool op0_preserve_value)
3892 enum machine_mode mode;
3893 unsigned HOST_WIDE_INT i, maxval;
3895 mode = GET_MODE (*op0);
3896 if (mode == VOIDmode)
3897 mode = GET_MODE (*op1);
3899 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3901 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3902 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3903 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3904 for GTU/LEU in Thumb mode. */
3905 if (mode == DImode)
3907 rtx tem;
3909 if (*code == GT || *code == LE
3910 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3912 /* Missing comparison. First try to use an available
3913 comparison. */
3914 if (CONST_INT_P (*op1))
3916 i = INTVAL (*op1);
3917 switch (*code)
3919 case GT:
3920 case LE:
3921 if (i != maxval
3922 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3924 *op1 = GEN_INT (i + 1);
3925 *code = *code == GT ? GE : LT;
3926 return;
3928 break;
3929 case GTU:
3930 case LEU:
3931 if (i != ~((unsigned HOST_WIDE_INT) 0)
3932 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3934 *op1 = GEN_INT (i + 1);
3935 *code = *code == GTU ? GEU : LTU;
3936 return;
3938 break;
3939 default:
3940 gcc_unreachable ();
3944 /* If that did not work, reverse the condition. */
3945 if (!op0_preserve_value)
3947 tem = *op0;
3948 *op0 = *op1;
3949 *op1 = tem;
3950 *code = (int)swap_condition ((enum rtx_code)*code);
3953 return;
3956 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3957 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3958 to facilitate possible combining with a cmp into 'ands'. */
3959 if (mode == SImode
3960 && GET_CODE (*op0) == ZERO_EXTEND
3961 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3962 && GET_MODE (XEXP (*op0, 0)) == QImode
3963 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3964 && subreg_lowpart_p (XEXP (*op0, 0))
3965 && *op1 == const0_rtx)
3966 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3967 GEN_INT (255));
3969 /* Comparisons smaller than DImode. Only adjust comparisons against
3970 an out-of-range constant. */
3971 if (!CONST_INT_P (*op1)
3972 || const_ok_for_arm (INTVAL (*op1))
3973 || const_ok_for_arm (- INTVAL (*op1)))
3974 return;
3976 i = INTVAL (*op1);
3978 switch (*code)
3980 case EQ:
3981 case NE:
3982 return;
3984 case GT:
3985 case LE:
3986 if (i != maxval
3987 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3989 *op1 = GEN_INT (i + 1);
3990 *code = *code == GT ? GE : LT;
3991 return;
3993 break;
3995 case GE:
3996 case LT:
3997 if (i != ~maxval
3998 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4000 *op1 = GEN_INT (i - 1);
4001 *code = *code == GE ? GT : LE;
4002 return;
4004 break;
4006 case GTU:
4007 case LEU:
4008 if (i != ~((unsigned HOST_WIDE_INT) 0)
4009 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4011 *op1 = GEN_INT (i + 1);
4012 *code = *code == GTU ? GEU : LTU;
4013 return;
4015 break;
4017 case GEU:
4018 case LTU:
4019 if (i != 0
4020 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4022 *op1 = GEN_INT (i - 1);
4023 *code = *code == GEU ? GTU : LEU;
4024 return;
4026 break;
4028 default:
4029 gcc_unreachable ();
4034 /* Define how to find the value returned by a function. */
4036 static rtx
4037 arm_function_value(const_tree type, const_tree func,
4038 bool outgoing ATTRIBUTE_UNUSED)
4040 enum machine_mode mode;
4041 int unsignedp ATTRIBUTE_UNUSED;
4042 rtx r ATTRIBUTE_UNUSED;
4044 mode = TYPE_MODE (type);
4046 if (TARGET_AAPCS_BASED)
4047 return aapcs_allocate_return_reg (mode, type, func);
4049 /* Promote integer types. */
4050 if (INTEGRAL_TYPE_P (type))
4051 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4053 /* Promotes small structs returned in a register to full-word size
4054 for big-endian AAPCS. */
4055 if (arm_return_in_msb (type))
4057 HOST_WIDE_INT size = int_size_in_bytes (type);
4058 if (size % UNITS_PER_WORD != 0)
4060 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4061 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4065 return arm_libcall_value_1 (mode);
4068 /* libcall hashtable helpers. */
4070 struct libcall_hasher : typed_noop_remove <rtx_def>
4072 typedef rtx_def value_type;
4073 typedef rtx_def compare_type;
4074 static inline hashval_t hash (const value_type *);
4075 static inline bool equal (const value_type *, const compare_type *);
4076 static inline void remove (value_type *);
4079 inline bool
4080 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4082 return rtx_equal_p (p1, p2);
4085 inline hashval_t
4086 libcall_hasher::hash (const value_type *p1)
4088 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4091 typedef hash_table <libcall_hasher> libcall_table_type;
4093 static void
4094 add_libcall (libcall_table_type htab, rtx libcall)
4096 *htab.find_slot (libcall, INSERT) = libcall;
4099 static bool
4100 arm_libcall_uses_aapcs_base (const_rtx libcall)
4102 static bool init_done = false;
4103 static libcall_table_type libcall_htab;
4105 if (!init_done)
4107 init_done = true;
4109 libcall_htab.create (31);
4110 add_libcall (libcall_htab,
4111 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4112 add_libcall (libcall_htab,
4113 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4114 add_libcall (libcall_htab,
4115 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4116 add_libcall (libcall_htab,
4117 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4119 add_libcall (libcall_htab,
4120 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4121 add_libcall (libcall_htab,
4122 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4123 add_libcall (libcall_htab,
4124 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4125 add_libcall (libcall_htab,
4126 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4128 add_libcall (libcall_htab,
4129 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4130 add_libcall (libcall_htab,
4131 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4132 add_libcall (libcall_htab,
4133 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4134 add_libcall (libcall_htab,
4135 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4136 add_libcall (libcall_htab,
4137 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4138 add_libcall (libcall_htab,
4139 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4140 add_libcall (libcall_htab,
4141 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4142 add_libcall (libcall_htab,
4143 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4145 /* Values from double-precision helper functions are returned in core
4146 registers if the selected core only supports single-precision
4147 arithmetic, even if we are using the hard-float ABI. The same is
4148 true for single-precision helpers, but we will never be using the
4149 hard-float ABI on a CPU which doesn't support single-precision
4150 operations in hardware. */
4151 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4152 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4153 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4154 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4155 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4156 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4157 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4158 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4159 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4160 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4161 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4162 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4163 SFmode));
4164 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4165 DFmode));
4168 return libcall && libcall_htab.find (libcall) != NULL;
4171 static rtx
4172 arm_libcall_value_1 (enum machine_mode mode)
4174 if (TARGET_AAPCS_BASED)
4175 return aapcs_libcall_value (mode);
4176 else if (TARGET_IWMMXT_ABI
4177 && arm_vector_mode_supported_p (mode))
4178 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4179 else
4180 return gen_rtx_REG (mode, ARG_REGISTER (1));
4183 /* Define how to find the value returned by a library function
4184 assuming the value has mode MODE. */
4186 static rtx
4187 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4189 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4190 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4192 /* The following libcalls return their result in integer registers,
4193 even though they return a floating point value. */
4194 if (arm_libcall_uses_aapcs_base (libcall))
4195 return gen_rtx_REG (mode, ARG_REGISTER(1));
4199 return arm_libcall_value_1 (mode);
4202 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4204 static bool
4205 arm_function_value_regno_p (const unsigned int regno)
4207 if (regno == ARG_REGISTER (1)
4208 || (TARGET_32BIT
4209 && TARGET_AAPCS_BASED
4210 && TARGET_VFP
4211 && TARGET_HARD_FLOAT
4212 && regno == FIRST_VFP_REGNUM)
4213 || (TARGET_IWMMXT_ABI
4214 && regno == FIRST_IWMMXT_REGNUM))
4215 return true;
4217 return false;
4220 /* Determine the amount of memory needed to store the possible return
4221 registers of an untyped call. */
4223 arm_apply_result_size (void)
4225 int size = 16;
4227 if (TARGET_32BIT)
4229 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4230 size += 32;
4231 if (TARGET_IWMMXT_ABI)
4232 size += 8;
4235 return size;
4238 /* Decide whether TYPE should be returned in memory (true)
4239 or in a register (false). FNTYPE is the type of the function making
4240 the call. */
4241 static bool
4242 arm_return_in_memory (const_tree type, const_tree fntype)
4244 HOST_WIDE_INT size;
4246 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4248 if (TARGET_AAPCS_BASED)
4250 /* Simple, non-aggregate types (ie not including vectors and
4251 complex) are always returned in a register (or registers).
4252 We don't care about which register here, so we can short-cut
4253 some of the detail. */
4254 if (!AGGREGATE_TYPE_P (type)
4255 && TREE_CODE (type) != VECTOR_TYPE
4256 && TREE_CODE (type) != COMPLEX_TYPE)
4257 return false;
4259 /* Any return value that is no larger than one word can be
4260 returned in r0. */
4261 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4262 return false;
4264 /* Check any available co-processors to see if they accept the
4265 type as a register candidate (VFP, for example, can return
4266 some aggregates in consecutive registers). These aren't
4267 available if the call is variadic. */
4268 if (aapcs_select_return_coproc (type, fntype) >= 0)
4269 return false;
4271 /* Vector values should be returned using ARM registers, not
4272 memory (unless they're over 16 bytes, which will break since
4273 we only have four call-clobbered registers to play with). */
4274 if (TREE_CODE (type) == VECTOR_TYPE)
4275 return (size < 0 || size > (4 * UNITS_PER_WORD));
4277 /* The rest go in memory. */
4278 return true;
4281 if (TREE_CODE (type) == VECTOR_TYPE)
4282 return (size < 0 || size > (4 * UNITS_PER_WORD));
4284 if (!AGGREGATE_TYPE_P (type) &&
4285 (TREE_CODE (type) != VECTOR_TYPE))
4286 /* All simple types are returned in registers. */
4287 return false;
4289 if (arm_abi != ARM_ABI_APCS)
4291 /* ATPCS and later return aggregate types in memory only if they are
4292 larger than a word (or are variable size). */
4293 return (size < 0 || size > UNITS_PER_WORD);
4296 /* For the arm-wince targets we choose to be compatible with Microsoft's
4297 ARM and Thumb compilers, which always return aggregates in memory. */
4298 #ifndef ARM_WINCE
4299 /* All structures/unions bigger than one word are returned in memory.
4300 Also catch the case where int_size_in_bytes returns -1. In this case
4301 the aggregate is either huge or of variable size, and in either case
4302 we will want to return it via memory and not in a register. */
4303 if (size < 0 || size > UNITS_PER_WORD)
4304 return true;
4306 if (TREE_CODE (type) == RECORD_TYPE)
4308 tree field;
4310 /* For a struct the APCS says that we only return in a register
4311 if the type is 'integer like' and every addressable element
4312 has an offset of zero. For practical purposes this means
4313 that the structure can have at most one non bit-field element
4314 and that this element must be the first one in the structure. */
4316 /* Find the first field, ignoring non FIELD_DECL things which will
4317 have been created by C++. */
4318 for (field = TYPE_FIELDS (type);
4319 field && TREE_CODE (field) != FIELD_DECL;
4320 field = DECL_CHAIN (field))
4321 continue;
4323 if (field == NULL)
4324 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4326 /* Check that the first field is valid for returning in a register. */
4328 /* ... Floats are not allowed */
4329 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4330 return true;
4332 /* ... Aggregates that are not themselves valid for returning in
4333 a register are not allowed. */
4334 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4335 return true;
4337 /* Now check the remaining fields, if any. Only bitfields are allowed,
4338 since they are not addressable. */
4339 for (field = DECL_CHAIN (field);
4340 field;
4341 field = DECL_CHAIN (field))
4343 if (TREE_CODE (field) != FIELD_DECL)
4344 continue;
4346 if (!DECL_BIT_FIELD_TYPE (field))
4347 return true;
4350 return false;
4353 if (TREE_CODE (type) == UNION_TYPE)
4355 tree field;
4357 /* Unions can be returned in registers if every element is
4358 integral, or can be returned in an integer register. */
4359 for (field = TYPE_FIELDS (type);
4360 field;
4361 field = DECL_CHAIN (field))
4363 if (TREE_CODE (field) != FIELD_DECL)
4364 continue;
4366 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4367 return true;
4369 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4370 return true;
4373 return false;
4375 #endif /* not ARM_WINCE */
4377 /* Return all other types in memory. */
4378 return true;
4381 const struct pcs_attribute_arg
4383 const char *arg;
4384 enum arm_pcs value;
4385 } pcs_attribute_args[] =
4387 {"aapcs", ARM_PCS_AAPCS},
4388 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4389 #if 0
4390 /* We could recognize these, but changes would be needed elsewhere
4391 * to implement them. */
4392 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4393 {"atpcs", ARM_PCS_ATPCS},
4394 {"apcs", ARM_PCS_APCS},
4395 #endif
4396 {NULL, ARM_PCS_UNKNOWN}
4399 static enum arm_pcs
4400 arm_pcs_from_attribute (tree attr)
4402 const struct pcs_attribute_arg *ptr;
4403 const char *arg;
4405 /* Get the value of the argument. */
4406 if (TREE_VALUE (attr) == NULL_TREE
4407 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4408 return ARM_PCS_UNKNOWN;
4410 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4412 /* Check it against the list of known arguments. */
4413 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4414 if (streq (arg, ptr->arg))
4415 return ptr->value;
4417 /* An unrecognized interrupt type. */
4418 return ARM_PCS_UNKNOWN;
4421 /* Get the PCS variant to use for this call. TYPE is the function's type
4422 specification, DECL is the specific declartion. DECL may be null if
4423 the call could be indirect or if this is a library call. */
4424 static enum arm_pcs
4425 arm_get_pcs_model (const_tree type, const_tree decl)
4427 bool user_convention = false;
4428 enum arm_pcs user_pcs = arm_pcs_default;
4429 tree attr;
4431 gcc_assert (type);
4433 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4434 if (attr)
4436 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4437 user_convention = true;
4440 if (TARGET_AAPCS_BASED)
4442 /* Detect varargs functions. These always use the base rules
4443 (no argument is ever a candidate for a co-processor
4444 register). */
4445 bool base_rules = stdarg_p (type);
4447 if (user_convention)
4449 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4450 sorry ("non-AAPCS derived PCS variant");
4451 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4452 error ("variadic functions must use the base AAPCS variant");
4455 if (base_rules)
4456 return ARM_PCS_AAPCS;
4457 else if (user_convention)
4458 return user_pcs;
4459 else if (decl && flag_unit_at_a_time)
4461 /* Local functions never leak outside this compilation unit,
4462 so we are free to use whatever conventions are
4463 appropriate. */
4464 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4465 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4466 if (i && i->local)
4467 return ARM_PCS_AAPCS_LOCAL;
4470 else if (user_convention && user_pcs != arm_pcs_default)
4471 sorry ("PCS variant");
4473 /* For everything else we use the target's default. */
4474 return arm_pcs_default;
4478 static void
4479 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4480 const_tree fntype ATTRIBUTE_UNUSED,
4481 rtx libcall ATTRIBUTE_UNUSED,
4482 const_tree fndecl ATTRIBUTE_UNUSED)
4484 /* Record the unallocated VFP registers. */
4485 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4486 pcum->aapcs_vfp_reg_alloc = 0;
4489 /* Walk down the type tree of TYPE counting consecutive base elements.
4490 If *MODEP is VOIDmode, then set it to the first valid floating point
4491 type. If a non-floating point type is found, or if a floating point
4492 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4493 otherwise return the count in the sub-tree. */
4494 static int
4495 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4497 enum machine_mode mode;
4498 HOST_WIDE_INT size;
4500 switch (TREE_CODE (type))
4502 case REAL_TYPE:
4503 mode = TYPE_MODE (type);
4504 if (mode != DFmode && mode != SFmode)
4505 return -1;
4507 if (*modep == VOIDmode)
4508 *modep = mode;
4510 if (*modep == mode)
4511 return 1;
4513 break;
4515 case COMPLEX_TYPE:
4516 mode = TYPE_MODE (TREE_TYPE (type));
4517 if (mode != DFmode && mode != SFmode)
4518 return -1;
4520 if (*modep == VOIDmode)
4521 *modep = mode;
4523 if (*modep == mode)
4524 return 2;
4526 break;
4528 case VECTOR_TYPE:
4529 /* Use V2SImode and V4SImode as representatives of all 64-bit
4530 and 128-bit vector types, whether or not those modes are
4531 supported with the present options. */
4532 size = int_size_in_bytes (type);
4533 switch (size)
4535 case 8:
4536 mode = V2SImode;
4537 break;
4538 case 16:
4539 mode = V4SImode;
4540 break;
4541 default:
4542 return -1;
4545 if (*modep == VOIDmode)
4546 *modep = mode;
4548 /* Vector modes are considered to be opaque: two vectors are
4549 equivalent for the purposes of being homogeneous aggregates
4550 if they are the same size. */
4551 if (*modep == mode)
4552 return 1;
4554 break;
4556 case ARRAY_TYPE:
4558 int count;
4559 tree index = TYPE_DOMAIN (type);
4561 /* Can't handle incomplete types. */
4562 if (!COMPLETE_TYPE_P (type))
4563 return -1;
4565 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4566 if (count == -1
4567 || !index
4568 || !TYPE_MAX_VALUE (index)
4569 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4570 || !TYPE_MIN_VALUE (index)
4571 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4572 || count < 0)
4573 return -1;
4575 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4576 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4578 /* There must be no padding. */
4579 if (!host_integerp (TYPE_SIZE (type), 1)
4580 || (tree_low_cst (TYPE_SIZE (type), 1)
4581 != count * GET_MODE_BITSIZE (*modep)))
4582 return -1;
4584 return count;
4587 case RECORD_TYPE:
4589 int count = 0;
4590 int sub_count;
4591 tree field;
4593 /* Can't handle incomplete types. */
4594 if (!COMPLETE_TYPE_P (type))
4595 return -1;
4597 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4599 if (TREE_CODE (field) != FIELD_DECL)
4600 continue;
4602 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4603 if (sub_count < 0)
4604 return -1;
4605 count += sub_count;
4608 /* There must be no padding. */
4609 if (!host_integerp (TYPE_SIZE (type), 1)
4610 || (tree_low_cst (TYPE_SIZE (type), 1)
4611 != count * GET_MODE_BITSIZE (*modep)))
4612 return -1;
4614 return count;
4617 case UNION_TYPE:
4618 case QUAL_UNION_TYPE:
4620 /* These aren't very interesting except in a degenerate case. */
4621 int count = 0;
4622 int sub_count;
4623 tree field;
4625 /* Can't handle incomplete types. */
4626 if (!COMPLETE_TYPE_P (type))
4627 return -1;
4629 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4631 if (TREE_CODE (field) != FIELD_DECL)
4632 continue;
4634 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4635 if (sub_count < 0)
4636 return -1;
4637 count = count > sub_count ? count : sub_count;
4640 /* There must be no padding. */
4641 if (!host_integerp (TYPE_SIZE (type), 1)
4642 || (tree_low_cst (TYPE_SIZE (type), 1)
4643 != count * GET_MODE_BITSIZE (*modep)))
4644 return -1;
4646 return count;
4649 default:
4650 break;
4653 return -1;
4656 /* Return true if PCS_VARIANT should use VFP registers. */
4657 static bool
4658 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4660 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4662 static bool seen_thumb1_vfp = false;
4664 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4666 sorry ("Thumb-1 hard-float VFP ABI");
4667 /* sorry() is not immediately fatal, so only display this once. */
4668 seen_thumb1_vfp = true;
4671 return true;
4674 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4675 return false;
4677 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4678 (TARGET_VFP_DOUBLE || !is_double));
4681 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4682 suitable for passing or returning in VFP registers for the PCS
4683 variant selected. If it is, then *BASE_MODE is updated to contain
4684 a machine mode describing each element of the argument's type and
4685 *COUNT to hold the number of such elements. */
4686 static bool
4687 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4688 enum machine_mode mode, const_tree type,
4689 enum machine_mode *base_mode, int *count)
4691 enum machine_mode new_mode = VOIDmode;
4693 /* If we have the type information, prefer that to working things
4694 out from the mode. */
4695 if (type)
4697 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4699 if (ag_count > 0 && ag_count <= 4)
4700 *count = ag_count;
4701 else
4702 return false;
4704 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4705 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4706 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4708 *count = 1;
4709 new_mode = mode;
4711 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4713 *count = 2;
4714 new_mode = (mode == DCmode ? DFmode : SFmode);
4716 else
4717 return false;
4720 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4721 return false;
4723 *base_mode = new_mode;
4724 return true;
4727 static bool
4728 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4729 enum machine_mode mode, const_tree type)
4731 int count ATTRIBUTE_UNUSED;
4732 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4734 if (!use_vfp_abi (pcs_variant, false))
4735 return false;
4736 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4737 &ag_mode, &count);
4740 static bool
4741 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4742 const_tree type)
4744 if (!use_vfp_abi (pcum->pcs_variant, false))
4745 return false;
4747 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4748 &pcum->aapcs_vfp_rmode,
4749 &pcum->aapcs_vfp_rcount);
4752 static bool
4753 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4754 const_tree type ATTRIBUTE_UNUSED)
4756 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4757 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4758 int regno;
4760 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4761 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4763 pcum->aapcs_vfp_reg_alloc = mask << regno;
4764 if (mode == BLKmode
4765 || (mode == TImode && ! TARGET_NEON)
4766 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4768 int i;
4769 int rcount = pcum->aapcs_vfp_rcount;
4770 int rshift = shift;
4771 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4772 rtx par;
4773 if (!TARGET_NEON)
4775 /* Avoid using unsupported vector modes. */
4776 if (rmode == V2SImode)
4777 rmode = DImode;
4778 else if (rmode == V4SImode)
4780 rmode = DImode;
4781 rcount *= 2;
4782 rshift /= 2;
4785 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4786 for (i = 0; i < rcount; i++)
4788 rtx tmp = gen_rtx_REG (rmode,
4789 FIRST_VFP_REGNUM + regno + i * rshift);
4790 tmp = gen_rtx_EXPR_LIST
4791 (VOIDmode, tmp,
4792 GEN_INT (i * GET_MODE_SIZE (rmode)));
4793 XVECEXP (par, 0, i) = tmp;
4796 pcum->aapcs_reg = par;
4798 else
4799 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4800 return true;
4802 return false;
4805 static rtx
4806 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4807 enum machine_mode mode,
4808 const_tree type ATTRIBUTE_UNUSED)
4810 if (!use_vfp_abi (pcs_variant, false))
4811 return NULL;
4813 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4815 int count;
4816 enum machine_mode ag_mode;
4817 int i;
4818 rtx par;
4819 int shift;
4821 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4822 &ag_mode, &count);
4824 if (!TARGET_NEON)
4826 if (ag_mode == V2SImode)
4827 ag_mode = DImode;
4828 else if (ag_mode == V4SImode)
4830 ag_mode = DImode;
4831 count *= 2;
4834 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4835 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4836 for (i = 0; i < count; i++)
4838 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4839 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4840 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4841 XVECEXP (par, 0, i) = tmp;
4844 return par;
4847 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4850 static void
4851 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4852 enum machine_mode mode ATTRIBUTE_UNUSED,
4853 const_tree type ATTRIBUTE_UNUSED)
4855 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4856 pcum->aapcs_vfp_reg_alloc = 0;
4857 return;
4860 #define AAPCS_CP(X) \
4862 aapcs_ ## X ## _cum_init, \
4863 aapcs_ ## X ## _is_call_candidate, \
4864 aapcs_ ## X ## _allocate, \
4865 aapcs_ ## X ## _is_return_candidate, \
4866 aapcs_ ## X ## _allocate_return_reg, \
4867 aapcs_ ## X ## _advance \
4870 /* Table of co-processors that can be used to pass arguments in
4871 registers. Idealy no arugment should be a candidate for more than
4872 one co-processor table entry, but the table is processed in order
4873 and stops after the first match. If that entry then fails to put
4874 the argument into a co-processor register, the argument will go on
4875 the stack. */
4876 static struct
4878 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4879 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4881 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4882 BLKmode) is a candidate for this co-processor's registers; this
4883 function should ignore any position-dependent state in
4884 CUMULATIVE_ARGS and only use call-type dependent information. */
4885 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4887 /* Return true if the argument does get a co-processor register; it
4888 should set aapcs_reg to an RTX of the register allocated as is
4889 required for a return from FUNCTION_ARG. */
4890 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4892 /* Return true if a result of mode MODE (or type TYPE if MODE is
4893 BLKmode) is can be returned in this co-processor's registers. */
4894 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4896 /* Allocate and return an RTX element to hold the return type of a
4897 call, this routine must not fail and will only be called if
4898 is_return_candidate returned true with the same parameters. */
4899 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4901 /* Finish processing this argument and prepare to start processing
4902 the next one. */
4903 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4904 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4906 AAPCS_CP(vfp)
4909 #undef AAPCS_CP
4911 static int
4912 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4913 const_tree type)
4915 int i;
4917 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4918 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4919 return i;
4921 return -1;
4924 static int
4925 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4927 /* We aren't passed a decl, so we can't check that a call is local.
4928 However, it isn't clear that that would be a win anyway, since it
4929 might limit some tail-calling opportunities. */
4930 enum arm_pcs pcs_variant;
4932 if (fntype)
4934 const_tree fndecl = NULL_TREE;
4936 if (TREE_CODE (fntype) == FUNCTION_DECL)
4938 fndecl = fntype;
4939 fntype = TREE_TYPE (fntype);
4942 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4944 else
4945 pcs_variant = arm_pcs_default;
4947 if (pcs_variant != ARM_PCS_AAPCS)
4949 int i;
4951 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4952 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4953 TYPE_MODE (type),
4954 type))
4955 return i;
4957 return -1;
4960 static rtx
4961 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4962 const_tree fntype)
4964 /* We aren't passed a decl, so we can't check that a call is local.
4965 However, it isn't clear that that would be a win anyway, since it
4966 might limit some tail-calling opportunities. */
4967 enum arm_pcs pcs_variant;
4968 int unsignedp ATTRIBUTE_UNUSED;
4970 if (fntype)
4972 const_tree fndecl = NULL_TREE;
4974 if (TREE_CODE (fntype) == FUNCTION_DECL)
4976 fndecl = fntype;
4977 fntype = TREE_TYPE (fntype);
4980 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4982 else
4983 pcs_variant = arm_pcs_default;
4985 /* Promote integer types. */
4986 if (type && INTEGRAL_TYPE_P (type))
4987 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4989 if (pcs_variant != ARM_PCS_AAPCS)
4991 int i;
4993 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4994 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4995 type))
4996 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4997 mode, type);
5000 /* Promotes small structs returned in a register to full-word size
5001 for big-endian AAPCS. */
5002 if (type && arm_return_in_msb (type))
5004 HOST_WIDE_INT size = int_size_in_bytes (type);
5005 if (size % UNITS_PER_WORD != 0)
5007 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5008 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5012 return gen_rtx_REG (mode, R0_REGNUM);
5015 static rtx
5016 aapcs_libcall_value (enum machine_mode mode)
5018 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5019 && GET_MODE_SIZE (mode) <= 4)
5020 mode = SImode;
5022 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5025 /* Lay out a function argument using the AAPCS rules. The rule
5026 numbers referred to here are those in the AAPCS. */
5027 static void
5028 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5029 const_tree type, bool named)
5031 int nregs, nregs2;
5032 int ncrn;
5034 /* We only need to do this once per argument. */
5035 if (pcum->aapcs_arg_processed)
5036 return;
5038 pcum->aapcs_arg_processed = true;
5040 /* Special case: if named is false then we are handling an incoming
5041 anonymous argument which is on the stack. */
5042 if (!named)
5043 return;
5045 /* Is this a potential co-processor register candidate? */
5046 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5048 int slot = aapcs_select_call_coproc (pcum, mode, type);
5049 pcum->aapcs_cprc_slot = slot;
5051 /* We don't have to apply any of the rules from part B of the
5052 preparation phase, these are handled elsewhere in the
5053 compiler. */
5055 if (slot >= 0)
5057 /* A Co-processor register candidate goes either in its own
5058 class of registers or on the stack. */
5059 if (!pcum->aapcs_cprc_failed[slot])
5061 /* C1.cp - Try to allocate the argument to co-processor
5062 registers. */
5063 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5064 return;
5066 /* C2.cp - Put the argument on the stack and note that we
5067 can't assign any more candidates in this slot. We also
5068 need to note that we have allocated stack space, so that
5069 we won't later try to split a non-cprc candidate between
5070 core registers and the stack. */
5071 pcum->aapcs_cprc_failed[slot] = true;
5072 pcum->can_split = false;
5075 /* We didn't get a register, so this argument goes on the
5076 stack. */
5077 gcc_assert (pcum->can_split == false);
5078 return;
5082 /* C3 - For double-word aligned arguments, round the NCRN up to the
5083 next even number. */
5084 ncrn = pcum->aapcs_ncrn;
5085 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5086 ncrn++;
5088 nregs = ARM_NUM_REGS2(mode, type);
5090 /* Sigh, this test should really assert that nregs > 0, but a GCC
5091 extension allows empty structs and then gives them empty size; it
5092 then allows such a structure to be passed by value. For some of
5093 the code below we have to pretend that such an argument has
5094 non-zero size so that we 'locate' it correctly either in
5095 registers or on the stack. */
5096 gcc_assert (nregs >= 0);
5098 nregs2 = nregs ? nregs : 1;
5100 /* C4 - Argument fits entirely in core registers. */
5101 if (ncrn + nregs2 <= NUM_ARG_REGS)
5103 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5104 pcum->aapcs_next_ncrn = ncrn + nregs;
5105 return;
5108 /* C5 - Some core registers left and there are no arguments already
5109 on the stack: split this argument between the remaining core
5110 registers and the stack. */
5111 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5113 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5114 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5115 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5116 return;
5119 /* C6 - NCRN is set to 4. */
5120 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5122 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5123 return;
5126 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5127 for a call to a function whose data type is FNTYPE.
5128 For a library call, FNTYPE is NULL. */
5129 void
5130 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5131 rtx libname,
5132 tree fndecl ATTRIBUTE_UNUSED)
5134 /* Long call handling. */
5135 if (fntype)
5136 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5137 else
5138 pcum->pcs_variant = arm_pcs_default;
5140 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5142 if (arm_libcall_uses_aapcs_base (libname))
5143 pcum->pcs_variant = ARM_PCS_AAPCS;
5145 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5146 pcum->aapcs_reg = NULL_RTX;
5147 pcum->aapcs_partial = 0;
5148 pcum->aapcs_arg_processed = false;
5149 pcum->aapcs_cprc_slot = -1;
5150 pcum->can_split = true;
5152 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5154 int i;
5156 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5158 pcum->aapcs_cprc_failed[i] = false;
5159 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5162 return;
5165 /* Legacy ABIs */
5167 /* On the ARM, the offset starts at 0. */
5168 pcum->nregs = 0;
5169 pcum->iwmmxt_nregs = 0;
5170 pcum->can_split = true;
5172 /* Varargs vectors are treated the same as long long.
5173 named_count avoids having to change the way arm handles 'named' */
5174 pcum->named_count = 0;
5175 pcum->nargs = 0;
5177 if (TARGET_REALLY_IWMMXT && fntype)
5179 tree fn_arg;
5181 for (fn_arg = TYPE_ARG_TYPES (fntype);
5182 fn_arg;
5183 fn_arg = TREE_CHAIN (fn_arg))
5184 pcum->named_count += 1;
5186 if (! pcum->named_count)
5187 pcum->named_count = INT_MAX;
5191 /* Return true if we use LRA instead of reload pass. */
5192 static bool
5193 arm_lra_p (void)
5195 return arm_lra_flag;
5198 /* Return true if mode/type need doubleword alignment. */
5199 static bool
5200 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5202 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5203 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5207 /* Determine where to put an argument to a function.
5208 Value is zero to push the argument on the stack,
5209 or a hard register in which to store the argument.
5211 MODE is the argument's machine mode.
5212 TYPE is the data type of the argument (as a tree).
5213 This is null for libcalls where that information may
5214 not be available.
5215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5216 the preceding args and about the function being called.
5217 NAMED is nonzero if this argument is a named parameter
5218 (otherwise it is an extra parameter matching an ellipsis).
5220 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5221 other arguments are passed on the stack. If (NAMED == 0) (which happens
5222 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5223 defined), say it is passed in the stack (function_prologue will
5224 indeed make it pass in the stack if necessary). */
5226 static rtx
5227 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5228 const_tree type, bool named)
5230 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5231 int nregs;
5233 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5234 a call insn (op3 of a call_value insn). */
5235 if (mode == VOIDmode)
5236 return const0_rtx;
5238 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5240 aapcs_layout_arg (pcum, mode, type, named);
5241 return pcum->aapcs_reg;
5244 /* Varargs vectors are treated the same as long long.
5245 named_count avoids having to change the way arm handles 'named' */
5246 if (TARGET_IWMMXT_ABI
5247 && arm_vector_mode_supported_p (mode)
5248 && pcum->named_count > pcum->nargs + 1)
5250 if (pcum->iwmmxt_nregs <= 9)
5251 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5252 else
5254 pcum->can_split = false;
5255 return NULL_RTX;
5259 /* Put doubleword aligned quantities in even register pairs. */
5260 if (pcum->nregs & 1
5261 && ARM_DOUBLEWORD_ALIGN
5262 && arm_needs_doubleword_align (mode, type))
5263 pcum->nregs++;
5265 /* Only allow splitting an arg between regs and memory if all preceding
5266 args were allocated to regs. For args passed by reference we only count
5267 the reference pointer. */
5268 if (pcum->can_split)
5269 nregs = 1;
5270 else
5271 nregs = ARM_NUM_REGS2 (mode, type);
5273 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5274 return NULL_RTX;
5276 return gen_rtx_REG (mode, pcum->nregs);
5279 static unsigned int
5280 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5282 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5283 ? DOUBLEWORD_ALIGNMENT
5284 : PARM_BOUNDARY);
5287 static int
5288 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5289 tree type, bool named)
5291 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5292 int nregs = pcum->nregs;
5294 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5296 aapcs_layout_arg (pcum, mode, type, named);
5297 return pcum->aapcs_partial;
5300 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5301 return 0;
5303 if (NUM_ARG_REGS > nregs
5304 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5305 && pcum->can_split)
5306 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5308 return 0;
5311 /* Update the data in PCUM to advance over an argument
5312 of mode MODE and data type TYPE.
5313 (TYPE is null for libcalls where that information may not be available.) */
5315 static void
5316 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5317 const_tree type, bool named)
5319 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5321 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5323 aapcs_layout_arg (pcum, mode, type, named);
5325 if (pcum->aapcs_cprc_slot >= 0)
5327 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5328 type);
5329 pcum->aapcs_cprc_slot = -1;
5332 /* Generic stuff. */
5333 pcum->aapcs_arg_processed = false;
5334 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5335 pcum->aapcs_reg = NULL_RTX;
5336 pcum->aapcs_partial = 0;
5338 else
5340 pcum->nargs += 1;
5341 if (arm_vector_mode_supported_p (mode)
5342 && pcum->named_count > pcum->nargs
5343 && TARGET_IWMMXT_ABI)
5344 pcum->iwmmxt_nregs += 1;
5345 else
5346 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5350 /* Variable sized types are passed by reference. This is a GCC
5351 extension to the ARM ABI. */
5353 static bool
5354 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5355 enum machine_mode mode ATTRIBUTE_UNUSED,
5356 const_tree type, bool named ATTRIBUTE_UNUSED)
5358 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5361 /* Encode the current state of the #pragma [no_]long_calls. */
5362 typedef enum
5364 OFF, /* No #pragma [no_]long_calls is in effect. */
5365 LONG, /* #pragma long_calls is in effect. */
5366 SHORT /* #pragma no_long_calls is in effect. */
5367 } arm_pragma_enum;
5369 static arm_pragma_enum arm_pragma_long_calls = OFF;
5371 void
5372 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5374 arm_pragma_long_calls = LONG;
5377 void
5378 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5380 arm_pragma_long_calls = SHORT;
5383 void
5384 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5386 arm_pragma_long_calls = OFF;
5389 /* Handle an attribute requiring a FUNCTION_DECL;
5390 arguments as in struct attribute_spec.handler. */
5391 static tree
5392 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5393 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5395 if (TREE_CODE (*node) != FUNCTION_DECL)
5397 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5398 name);
5399 *no_add_attrs = true;
5402 return NULL_TREE;
5405 /* Handle an "interrupt" or "isr" attribute;
5406 arguments as in struct attribute_spec.handler. */
5407 static tree
5408 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5409 bool *no_add_attrs)
5411 if (DECL_P (*node))
5413 if (TREE_CODE (*node) != FUNCTION_DECL)
5415 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5416 name);
5417 *no_add_attrs = true;
5419 /* FIXME: the argument if any is checked for type attributes;
5420 should it be checked for decl ones? */
5422 else
5424 if (TREE_CODE (*node) == FUNCTION_TYPE
5425 || TREE_CODE (*node) == METHOD_TYPE)
5427 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5429 warning (OPT_Wattributes, "%qE attribute ignored",
5430 name);
5431 *no_add_attrs = true;
5434 else if (TREE_CODE (*node) == POINTER_TYPE
5435 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5436 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5437 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5439 *node = build_variant_type_copy (*node);
5440 TREE_TYPE (*node) = build_type_attribute_variant
5441 (TREE_TYPE (*node),
5442 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5443 *no_add_attrs = true;
5445 else
5447 /* Possibly pass this attribute on from the type to a decl. */
5448 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5449 | (int) ATTR_FLAG_FUNCTION_NEXT
5450 | (int) ATTR_FLAG_ARRAY_NEXT))
5452 *no_add_attrs = true;
5453 return tree_cons (name, args, NULL_TREE);
5455 else
5457 warning (OPT_Wattributes, "%qE attribute ignored",
5458 name);
5463 return NULL_TREE;
5466 /* Handle a "pcs" attribute; arguments as in struct
5467 attribute_spec.handler. */
5468 static tree
5469 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5470 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5472 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5474 warning (OPT_Wattributes, "%qE attribute ignored", name);
5475 *no_add_attrs = true;
5477 return NULL_TREE;
5480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5481 /* Handle the "notshared" attribute. This attribute is another way of
5482 requesting hidden visibility. ARM's compiler supports
5483 "__declspec(notshared)"; we support the same thing via an
5484 attribute. */
5486 static tree
5487 arm_handle_notshared_attribute (tree *node,
5488 tree name ATTRIBUTE_UNUSED,
5489 tree args ATTRIBUTE_UNUSED,
5490 int flags ATTRIBUTE_UNUSED,
5491 bool *no_add_attrs)
5493 tree decl = TYPE_NAME (*node);
5495 if (decl)
5497 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5498 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5499 *no_add_attrs = false;
5501 return NULL_TREE;
5503 #endif
5505 /* Return 0 if the attributes for two types are incompatible, 1 if they
5506 are compatible, and 2 if they are nearly compatible (which causes a
5507 warning to be generated). */
5508 static int
5509 arm_comp_type_attributes (const_tree type1, const_tree type2)
5511 int l1, l2, s1, s2;
5513 /* Check for mismatch of non-default calling convention. */
5514 if (TREE_CODE (type1) != FUNCTION_TYPE)
5515 return 1;
5517 /* Check for mismatched call attributes. */
5518 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5519 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5520 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5521 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5523 /* Only bother to check if an attribute is defined. */
5524 if (l1 | l2 | s1 | s2)
5526 /* If one type has an attribute, the other must have the same attribute. */
5527 if ((l1 != l2) || (s1 != s2))
5528 return 0;
5530 /* Disallow mixed attributes. */
5531 if ((l1 & s2) || (l2 & s1))
5532 return 0;
5535 /* Check for mismatched ISR attribute. */
5536 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5537 if (! l1)
5538 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5539 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5540 if (! l2)
5541 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5542 if (l1 != l2)
5543 return 0;
5545 return 1;
5548 /* Assigns default attributes to newly defined type. This is used to
5549 set short_call/long_call attributes for function types of
5550 functions defined inside corresponding #pragma scopes. */
5551 static void
5552 arm_set_default_type_attributes (tree type)
5554 /* Add __attribute__ ((long_call)) to all functions, when
5555 inside #pragma long_calls or __attribute__ ((short_call)),
5556 when inside #pragma no_long_calls. */
5557 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5559 tree type_attr_list, attr_name;
5560 type_attr_list = TYPE_ATTRIBUTES (type);
5562 if (arm_pragma_long_calls == LONG)
5563 attr_name = get_identifier ("long_call");
5564 else if (arm_pragma_long_calls == SHORT)
5565 attr_name = get_identifier ("short_call");
5566 else
5567 return;
5569 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5570 TYPE_ATTRIBUTES (type) = type_attr_list;
5574 /* Return true if DECL is known to be linked into section SECTION. */
5576 static bool
5577 arm_function_in_section_p (tree decl, section *section)
5579 /* We can only be certain about functions defined in the same
5580 compilation unit. */
5581 if (!TREE_STATIC (decl))
5582 return false;
5584 /* Make sure that SYMBOL always binds to the definition in this
5585 compilation unit. */
5586 if (!targetm.binds_local_p (decl))
5587 return false;
5589 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5590 if (!DECL_SECTION_NAME (decl))
5592 /* Make sure that we will not create a unique section for DECL. */
5593 if (flag_function_sections || DECL_ONE_ONLY (decl))
5594 return false;
5597 return function_section (decl) == section;
5600 /* Return nonzero if a 32-bit "long_call" should be generated for
5601 a call from the current function to DECL. We generate a long_call
5602 if the function:
5604 a. has an __attribute__((long call))
5605 or b. is within the scope of a #pragma long_calls
5606 or c. the -mlong-calls command line switch has been specified
5608 However we do not generate a long call if the function:
5610 d. has an __attribute__ ((short_call))
5611 or e. is inside the scope of a #pragma no_long_calls
5612 or f. is defined in the same section as the current function. */
5614 bool
5615 arm_is_long_call_p (tree decl)
5617 tree attrs;
5619 if (!decl)
5620 return TARGET_LONG_CALLS;
5622 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5623 if (lookup_attribute ("short_call", attrs))
5624 return false;
5626 /* For "f", be conservative, and only cater for cases in which the
5627 whole of the current function is placed in the same section. */
5628 if (!flag_reorder_blocks_and_partition
5629 && TREE_CODE (decl) == FUNCTION_DECL
5630 && arm_function_in_section_p (decl, current_function_section ()))
5631 return false;
5633 if (lookup_attribute ("long_call", attrs))
5634 return true;
5636 return TARGET_LONG_CALLS;
5639 /* Return nonzero if it is ok to make a tail-call to DECL. */
5640 static bool
5641 arm_function_ok_for_sibcall (tree decl, tree exp)
5643 unsigned long func_type;
5645 if (cfun->machine->sibcall_blocked)
5646 return false;
5648 /* Never tailcall something if we are generating code for Thumb-1. */
5649 if (TARGET_THUMB1)
5650 return false;
5652 /* The PIC register is live on entry to VxWorks PLT entries, so we
5653 must make the call before restoring the PIC register. */
5654 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5655 return false;
5657 /* Cannot tail-call to long calls, since these are out of range of
5658 a branch instruction. */
5659 if (decl && arm_is_long_call_p (decl))
5660 return false;
5662 /* If we are interworking and the function is not declared static
5663 then we can't tail-call it unless we know that it exists in this
5664 compilation unit (since it might be a Thumb routine). */
5665 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5666 && !TREE_ASM_WRITTEN (decl))
5667 return false;
5669 func_type = arm_current_func_type ();
5670 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5671 if (IS_INTERRUPT (func_type))
5672 return false;
5674 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5676 /* Check that the return value locations are the same. For
5677 example that we aren't returning a value from the sibling in
5678 a VFP register but then need to transfer it to a core
5679 register. */
5680 rtx a, b;
5682 a = arm_function_value (TREE_TYPE (exp), decl, false);
5683 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5684 cfun->decl, false);
5685 if (!rtx_equal_p (a, b))
5686 return false;
5689 /* Never tailcall if function may be called with a misaligned SP. */
5690 if (IS_STACKALIGN (func_type))
5691 return false;
5693 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5694 references should become a NOP. Don't convert such calls into
5695 sibling calls. */
5696 if (TARGET_AAPCS_BASED
5697 && arm_abi == ARM_ABI_AAPCS
5698 && decl
5699 && DECL_WEAK (decl))
5700 return false;
5702 /* Everything else is ok. */
5703 return true;
5707 /* Addressing mode support functions. */
5709 /* Return nonzero if X is a legitimate immediate operand when compiling
5710 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5712 legitimate_pic_operand_p (rtx x)
5714 if (GET_CODE (x) == SYMBOL_REF
5715 || (GET_CODE (x) == CONST
5716 && GET_CODE (XEXP (x, 0)) == PLUS
5717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5718 return 0;
5720 return 1;
5723 /* Record that the current function needs a PIC register. Initialize
5724 cfun->machine->pic_reg if we have not already done so. */
5726 static void
5727 require_pic_register (void)
5729 /* A lot of the logic here is made obscure by the fact that this
5730 routine gets called as part of the rtx cost estimation process.
5731 We don't want those calls to affect any assumptions about the real
5732 function; and further, we can't call entry_of_function() until we
5733 start the real expansion process. */
5734 if (!crtl->uses_pic_offset_table)
5736 gcc_assert (can_create_pseudo_p ());
5737 if (arm_pic_register != INVALID_REGNUM)
5739 if (!cfun->machine->pic_reg)
5740 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5742 /* Play games to avoid marking the function as needing pic
5743 if we are being called as part of the cost-estimation
5744 process. */
5745 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5746 crtl->uses_pic_offset_table = 1;
5748 else
5750 rtx seq, insn;
5752 if (!cfun->machine->pic_reg)
5753 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5755 /* Play games to avoid marking the function as needing pic
5756 if we are being called as part of the cost-estimation
5757 process. */
5758 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5760 crtl->uses_pic_offset_table = 1;
5761 start_sequence ();
5763 arm_load_pic_register (0UL);
5765 seq = get_insns ();
5766 end_sequence ();
5768 for (insn = seq; insn; insn = NEXT_INSN (insn))
5769 if (INSN_P (insn))
5770 INSN_LOCATION (insn) = prologue_location;
5772 /* We can be called during expansion of PHI nodes, where
5773 we can't yet emit instructions directly in the final
5774 insn stream. Queue the insns on the entry edge, they will
5775 be committed after everything else is expanded. */
5776 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5783 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5785 if (GET_CODE (orig) == SYMBOL_REF
5786 || GET_CODE (orig) == LABEL_REF)
5788 rtx insn;
5790 if (reg == 0)
5792 gcc_assert (can_create_pseudo_p ());
5793 reg = gen_reg_rtx (Pmode);
5796 /* VxWorks does not impose a fixed gap between segments; the run-time
5797 gap can be different from the object-file gap. We therefore can't
5798 use GOTOFF unless we are absolutely sure that the symbol is in the
5799 same segment as the GOT. Unfortunately, the flexibility of linker
5800 scripts means that we can't be sure of that in general, so assume
5801 that GOTOFF is never valid on VxWorks. */
5802 if ((GET_CODE (orig) == LABEL_REF
5803 || (GET_CODE (orig) == SYMBOL_REF &&
5804 SYMBOL_REF_LOCAL_P (orig)))
5805 && NEED_GOT_RELOC
5806 && !TARGET_VXWORKS_RTP)
5807 insn = arm_pic_static_addr (orig, reg);
5808 else
5810 rtx pat;
5811 rtx mem;
5813 /* If this function doesn't have a pic register, create one now. */
5814 require_pic_register ();
5816 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5818 /* Make the MEM as close to a constant as possible. */
5819 mem = SET_SRC (pat);
5820 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5821 MEM_READONLY_P (mem) = 1;
5822 MEM_NOTRAP_P (mem) = 1;
5824 insn = emit_insn (pat);
5827 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5828 by loop. */
5829 set_unique_reg_note (insn, REG_EQUAL, orig);
5831 return reg;
5833 else if (GET_CODE (orig) == CONST)
5835 rtx base, offset;
5837 if (GET_CODE (XEXP (orig, 0)) == PLUS
5838 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5839 return orig;
5841 /* Handle the case where we have: const (UNSPEC_TLS). */
5842 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5843 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5844 return orig;
5846 /* Handle the case where we have:
5847 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5848 CONST_INT. */
5849 if (GET_CODE (XEXP (orig, 0)) == PLUS
5850 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5851 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5853 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5854 return orig;
5857 if (reg == 0)
5859 gcc_assert (can_create_pseudo_p ());
5860 reg = gen_reg_rtx (Pmode);
5863 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5865 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5866 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5867 base == reg ? 0 : reg);
5869 if (CONST_INT_P (offset))
5871 /* The base register doesn't really matter, we only want to
5872 test the index for the appropriate mode. */
5873 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5875 gcc_assert (can_create_pseudo_p ());
5876 offset = force_reg (Pmode, offset);
5879 if (CONST_INT_P (offset))
5880 return plus_constant (Pmode, base, INTVAL (offset));
5883 if (GET_MODE_SIZE (mode) > 4
5884 && (GET_MODE_CLASS (mode) == MODE_INT
5885 || TARGET_SOFT_FLOAT))
5887 emit_insn (gen_addsi3 (reg, base, offset));
5888 return reg;
5891 return gen_rtx_PLUS (Pmode, base, offset);
5894 return orig;
5898 /* Find a spare register to use during the prolog of a function. */
5900 static int
5901 thumb_find_work_register (unsigned long pushed_regs_mask)
5903 int reg;
5905 /* Check the argument registers first as these are call-used. The
5906 register allocation order means that sometimes r3 might be used
5907 but earlier argument registers might not, so check them all. */
5908 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5909 if (!df_regs_ever_live_p (reg))
5910 return reg;
5912 /* Before going on to check the call-saved registers we can try a couple
5913 more ways of deducing that r3 is available. The first is when we are
5914 pushing anonymous arguments onto the stack and we have less than 4
5915 registers worth of fixed arguments(*). In this case r3 will be part of
5916 the variable argument list and so we can be sure that it will be
5917 pushed right at the start of the function. Hence it will be available
5918 for the rest of the prologue.
5919 (*): ie crtl->args.pretend_args_size is greater than 0. */
5920 if (cfun->machine->uses_anonymous_args
5921 && crtl->args.pretend_args_size > 0)
5922 return LAST_ARG_REGNUM;
5924 /* The other case is when we have fixed arguments but less than 4 registers
5925 worth. In this case r3 might be used in the body of the function, but
5926 it is not being used to convey an argument into the function. In theory
5927 we could just check crtl->args.size to see how many bytes are
5928 being passed in argument registers, but it seems that it is unreliable.
5929 Sometimes it will have the value 0 when in fact arguments are being
5930 passed. (See testcase execute/20021111-1.c for an example). So we also
5931 check the args_info.nregs field as well. The problem with this field is
5932 that it makes no allowances for arguments that are passed to the
5933 function but which are not used. Hence we could miss an opportunity
5934 when a function has an unused argument in r3. But it is better to be
5935 safe than to be sorry. */
5936 if (! cfun->machine->uses_anonymous_args
5937 && crtl->args.size >= 0
5938 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5939 && (TARGET_AAPCS_BASED
5940 ? crtl->args.info.aapcs_ncrn < 4
5941 : crtl->args.info.nregs < 4))
5942 return LAST_ARG_REGNUM;
5944 /* Otherwise look for a call-saved register that is going to be pushed. */
5945 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5946 if (pushed_regs_mask & (1 << reg))
5947 return reg;
5949 if (TARGET_THUMB2)
5951 /* Thumb-2 can use high regs. */
5952 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5953 if (pushed_regs_mask & (1 << reg))
5954 return reg;
5956 /* Something went wrong - thumb_compute_save_reg_mask()
5957 should have arranged for a suitable register to be pushed. */
5958 gcc_unreachable ();
5961 static GTY(()) int pic_labelno;
5963 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5964 low register. */
5966 void
5967 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5969 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5971 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5972 return;
5974 gcc_assert (flag_pic);
5976 pic_reg = cfun->machine->pic_reg;
5977 if (TARGET_VXWORKS_RTP)
5979 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5980 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5981 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5983 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5985 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5986 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5988 else
5990 /* We use an UNSPEC rather than a LABEL_REF because this label
5991 never appears in the code stream. */
5993 labelno = GEN_INT (pic_labelno++);
5994 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5995 l1 = gen_rtx_CONST (VOIDmode, l1);
5997 /* On the ARM the PC register contains 'dot + 8' at the time of the
5998 addition, on the Thumb it is 'dot + 4'. */
5999 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6000 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6001 UNSPEC_GOTSYM_OFF);
6002 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6004 if (TARGET_32BIT)
6006 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6008 else /* TARGET_THUMB1 */
6010 if (arm_pic_register != INVALID_REGNUM
6011 && REGNO (pic_reg) > LAST_LO_REGNUM)
6013 /* We will have pushed the pic register, so we should always be
6014 able to find a work register. */
6015 pic_tmp = gen_rtx_REG (SImode,
6016 thumb_find_work_register (saved_regs));
6017 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6018 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6019 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6021 else
6022 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6026 /* Need to emit this whether or not we obey regdecls,
6027 since setjmp/longjmp can cause life info to screw up. */
6028 emit_use (pic_reg);
6031 /* Generate code to load the address of a static var when flag_pic is set. */
6032 static rtx
6033 arm_pic_static_addr (rtx orig, rtx reg)
6035 rtx l1, labelno, offset_rtx, insn;
6037 gcc_assert (flag_pic);
6039 /* We use an UNSPEC rather than a LABEL_REF because this label
6040 never appears in the code stream. */
6041 labelno = GEN_INT (pic_labelno++);
6042 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6043 l1 = gen_rtx_CONST (VOIDmode, l1);
6045 /* On the ARM the PC register contains 'dot + 8' at the time of the
6046 addition, on the Thumb it is 'dot + 4'. */
6047 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6048 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6049 UNSPEC_SYMBOL_OFFSET);
6050 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6052 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6053 return insn;
6056 /* Return nonzero if X is valid as an ARM state addressing register. */
6057 static int
6058 arm_address_register_rtx_p (rtx x, int strict_p)
6060 int regno;
6062 if (!REG_P (x))
6063 return 0;
6065 regno = REGNO (x);
6067 if (strict_p)
6068 return ARM_REGNO_OK_FOR_BASE_P (regno);
6070 return (regno <= LAST_ARM_REGNUM
6071 || regno >= FIRST_PSEUDO_REGISTER
6072 || regno == FRAME_POINTER_REGNUM
6073 || regno == ARG_POINTER_REGNUM);
6076 /* Return TRUE if this rtx is the difference of a symbol and a label,
6077 and will reduce to a PC-relative relocation in the object file.
6078 Expressions like this can be left alone when generating PIC, rather
6079 than forced through the GOT. */
6080 static int
6081 pcrel_constant_p (rtx x)
6083 if (GET_CODE (x) == MINUS)
6084 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6086 return FALSE;
6089 /* Return true if X will surely end up in an index register after next
6090 splitting pass. */
6091 static bool
6092 will_be_in_index_register (const_rtx x)
6094 /* arm.md: calculate_pic_address will split this into a register. */
6095 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6098 /* Return nonzero if X is a valid ARM state address operand. */
6100 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6101 int strict_p)
6103 bool use_ldrd;
6104 enum rtx_code code = GET_CODE (x);
6106 if (arm_address_register_rtx_p (x, strict_p))
6107 return 1;
6109 use_ldrd = (TARGET_LDRD
6110 && (mode == DImode
6111 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6113 if (code == POST_INC || code == PRE_DEC
6114 || ((code == PRE_INC || code == POST_DEC)
6115 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6116 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6118 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6119 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6120 && GET_CODE (XEXP (x, 1)) == PLUS
6121 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6123 rtx addend = XEXP (XEXP (x, 1), 1);
6125 /* Don't allow ldrd post increment by register because it's hard
6126 to fixup invalid register choices. */
6127 if (use_ldrd
6128 && GET_CODE (x) == POST_MODIFY
6129 && REG_P (addend))
6130 return 0;
6132 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6133 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6136 /* After reload constants split into minipools will have addresses
6137 from a LABEL_REF. */
6138 else if (reload_completed
6139 && (code == LABEL_REF
6140 || (code == CONST
6141 && GET_CODE (XEXP (x, 0)) == PLUS
6142 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6143 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6144 return 1;
6146 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6147 return 0;
6149 else if (code == PLUS)
6151 rtx xop0 = XEXP (x, 0);
6152 rtx xop1 = XEXP (x, 1);
6154 return ((arm_address_register_rtx_p (xop0, strict_p)
6155 && ((CONST_INT_P (xop1)
6156 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6157 || (!strict_p && will_be_in_index_register (xop1))))
6158 || (arm_address_register_rtx_p (xop1, strict_p)
6159 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6162 #if 0
6163 /* Reload currently can't handle MINUS, so disable this for now */
6164 else if (GET_CODE (x) == MINUS)
6166 rtx xop0 = XEXP (x, 0);
6167 rtx xop1 = XEXP (x, 1);
6169 return (arm_address_register_rtx_p (xop0, strict_p)
6170 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6172 #endif
6174 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6175 && code == SYMBOL_REF
6176 && CONSTANT_POOL_ADDRESS_P (x)
6177 && ! (flag_pic
6178 && symbol_mentioned_p (get_pool_constant (x))
6179 && ! pcrel_constant_p (get_pool_constant (x))))
6180 return 1;
6182 return 0;
6185 /* Return nonzero if X is a valid Thumb-2 address operand. */
6186 static int
6187 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6189 bool use_ldrd;
6190 enum rtx_code code = GET_CODE (x);
6192 if (arm_address_register_rtx_p (x, strict_p))
6193 return 1;
6195 use_ldrd = (TARGET_LDRD
6196 && (mode == DImode
6197 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6199 if (code == POST_INC || code == PRE_DEC
6200 || ((code == PRE_INC || code == POST_DEC)
6201 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6202 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6204 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6205 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6206 && GET_CODE (XEXP (x, 1)) == PLUS
6207 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6209 /* Thumb-2 only has autoincrement by constant. */
6210 rtx addend = XEXP (XEXP (x, 1), 1);
6211 HOST_WIDE_INT offset;
6213 if (!CONST_INT_P (addend))
6214 return 0;
6216 offset = INTVAL(addend);
6217 if (GET_MODE_SIZE (mode) <= 4)
6218 return (offset > -256 && offset < 256);
6220 return (use_ldrd && offset > -1024 && offset < 1024
6221 && (offset & 3) == 0);
6224 /* After reload constants split into minipools will have addresses
6225 from a LABEL_REF. */
6226 else if (reload_completed
6227 && (code == LABEL_REF
6228 || (code == CONST
6229 && GET_CODE (XEXP (x, 0)) == PLUS
6230 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6231 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6232 return 1;
6234 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6235 return 0;
6237 else if (code == PLUS)
6239 rtx xop0 = XEXP (x, 0);
6240 rtx xop1 = XEXP (x, 1);
6242 return ((arm_address_register_rtx_p (xop0, strict_p)
6243 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6244 || (!strict_p && will_be_in_index_register (xop1))))
6245 || (arm_address_register_rtx_p (xop1, strict_p)
6246 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6249 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6250 && code == SYMBOL_REF
6251 && CONSTANT_POOL_ADDRESS_P (x)
6252 && ! (flag_pic
6253 && symbol_mentioned_p (get_pool_constant (x))
6254 && ! pcrel_constant_p (get_pool_constant (x))))
6255 return 1;
6257 return 0;
6260 /* Return nonzero if INDEX is valid for an address index operand in
6261 ARM state. */
6262 static int
6263 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6264 int strict_p)
6266 HOST_WIDE_INT range;
6267 enum rtx_code code = GET_CODE (index);
6269 /* Standard coprocessor addressing modes. */
6270 if (TARGET_HARD_FLOAT
6271 && TARGET_VFP
6272 && (mode == SFmode || mode == DFmode))
6273 return (code == CONST_INT && INTVAL (index) < 1024
6274 && INTVAL (index) > -1024
6275 && (INTVAL (index) & 3) == 0);
6277 /* For quad modes, we restrict the constant offset to be slightly less
6278 than what the instruction format permits. We do this because for
6279 quad mode moves, we will actually decompose them into two separate
6280 double-mode reads or writes. INDEX must therefore be a valid
6281 (double-mode) offset and so should INDEX+8. */
6282 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6283 return (code == CONST_INT
6284 && INTVAL (index) < 1016
6285 && INTVAL (index) > -1024
6286 && (INTVAL (index) & 3) == 0);
6288 /* We have no such constraint on double mode offsets, so we permit the
6289 full range of the instruction format. */
6290 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6291 return (code == CONST_INT
6292 && INTVAL (index) < 1024
6293 && INTVAL (index) > -1024
6294 && (INTVAL (index) & 3) == 0);
6296 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6297 return (code == CONST_INT
6298 && INTVAL (index) < 1024
6299 && INTVAL (index) > -1024
6300 && (INTVAL (index) & 3) == 0);
6302 if (arm_address_register_rtx_p (index, strict_p)
6303 && (GET_MODE_SIZE (mode) <= 4))
6304 return 1;
6306 if (mode == DImode || mode == DFmode)
6308 if (code == CONST_INT)
6310 HOST_WIDE_INT val = INTVAL (index);
6312 if (TARGET_LDRD)
6313 return val > -256 && val < 256;
6314 else
6315 return val > -4096 && val < 4092;
6318 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6321 if (GET_MODE_SIZE (mode) <= 4
6322 && ! (arm_arch4
6323 && (mode == HImode
6324 || mode == HFmode
6325 || (mode == QImode && outer == SIGN_EXTEND))))
6327 if (code == MULT)
6329 rtx xiop0 = XEXP (index, 0);
6330 rtx xiop1 = XEXP (index, 1);
6332 return ((arm_address_register_rtx_p (xiop0, strict_p)
6333 && power_of_two_operand (xiop1, SImode))
6334 || (arm_address_register_rtx_p (xiop1, strict_p)
6335 && power_of_two_operand (xiop0, SImode)));
6337 else if (code == LSHIFTRT || code == ASHIFTRT
6338 || code == ASHIFT || code == ROTATERT)
6340 rtx op = XEXP (index, 1);
6342 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6343 && CONST_INT_P (op)
6344 && INTVAL (op) > 0
6345 && INTVAL (op) <= 31);
6349 /* For ARM v4 we may be doing a sign-extend operation during the
6350 load. */
6351 if (arm_arch4)
6353 if (mode == HImode
6354 || mode == HFmode
6355 || (outer == SIGN_EXTEND && mode == QImode))
6356 range = 256;
6357 else
6358 range = 4096;
6360 else
6361 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6363 return (code == CONST_INT
6364 && INTVAL (index) < range
6365 && INTVAL (index) > -range);
6368 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6369 index operand. i.e. 1, 2, 4 or 8. */
6370 static bool
6371 thumb2_index_mul_operand (rtx op)
6373 HOST_WIDE_INT val;
6375 if (!CONST_INT_P (op))
6376 return false;
6378 val = INTVAL(op);
6379 return (val == 1 || val == 2 || val == 4 || val == 8);
6382 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6383 static int
6384 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6386 enum rtx_code code = GET_CODE (index);
6388 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6389 /* Standard coprocessor addressing modes. */
6390 if (TARGET_HARD_FLOAT
6391 && TARGET_VFP
6392 && (mode == SFmode || mode == DFmode))
6393 return (code == CONST_INT && INTVAL (index) < 1024
6394 /* Thumb-2 allows only > -256 index range for it's core register
6395 load/stores. Since we allow SF/DF in core registers, we have
6396 to use the intersection between -256~4096 (core) and -1024~1024
6397 (coprocessor). */
6398 && INTVAL (index) > -256
6399 && (INTVAL (index) & 3) == 0);
6401 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6403 /* For DImode assume values will usually live in core regs
6404 and only allow LDRD addressing modes. */
6405 if (!TARGET_LDRD || mode != DImode)
6406 return (code == CONST_INT
6407 && INTVAL (index) < 1024
6408 && INTVAL (index) > -1024
6409 && (INTVAL (index) & 3) == 0);
6412 /* For quad modes, we restrict the constant offset to be slightly less
6413 than what the instruction format permits. We do this because for
6414 quad mode moves, we will actually decompose them into two separate
6415 double-mode reads or writes. INDEX must therefore be a valid
6416 (double-mode) offset and so should INDEX+8. */
6417 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6418 return (code == CONST_INT
6419 && INTVAL (index) < 1016
6420 && INTVAL (index) > -1024
6421 && (INTVAL (index) & 3) == 0);
6423 /* We have no such constraint on double mode offsets, so we permit the
6424 full range of the instruction format. */
6425 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6426 return (code == CONST_INT
6427 && INTVAL (index) < 1024
6428 && INTVAL (index) > -1024
6429 && (INTVAL (index) & 3) == 0);
6431 if (arm_address_register_rtx_p (index, strict_p)
6432 && (GET_MODE_SIZE (mode) <= 4))
6433 return 1;
6435 if (mode == DImode || mode == DFmode)
6437 if (code == CONST_INT)
6439 HOST_WIDE_INT val = INTVAL (index);
6440 /* ??? Can we assume ldrd for thumb2? */
6441 /* Thumb-2 ldrd only has reg+const addressing modes. */
6442 /* ldrd supports offsets of +-1020.
6443 However the ldr fallback does not. */
6444 return val > -256 && val < 256 && (val & 3) == 0;
6446 else
6447 return 0;
6450 if (code == MULT)
6452 rtx xiop0 = XEXP (index, 0);
6453 rtx xiop1 = XEXP (index, 1);
6455 return ((arm_address_register_rtx_p (xiop0, strict_p)
6456 && thumb2_index_mul_operand (xiop1))
6457 || (arm_address_register_rtx_p (xiop1, strict_p)
6458 && thumb2_index_mul_operand (xiop0)));
6460 else if (code == ASHIFT)
6462 rtx op = XEXP (index, 1);
6464 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6465 && CONST_INT_P (op)
6466 && INTVAL (op) > 0
6467 && INTVAL (op) <= 3);
6470 return (code == CONST_INT
6471 && INTVAL (index) < 4096
6472 && INTVAL (index) > -256);
6475 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6476 static int
6477 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6479 int regno;
6481 if (!REG_P (x))
6482 return 0;
6484 regno = REGNO (x);
6486 if (strict_p)
6487 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6489 return (regno <= LAST_LO_REGNUM
6490 || regno > LAST_VIRTUAL_REGISTER
6491 || regno == FRAME_POINTER_REGNUM
6492 || (GET_MODE_SIZE (mode) >= 4
6493 && (regno == STACK_POINTER_REGNUM
6494 || regno >= FIRST_PSEUDO_REGISTER
6495 || x == hard_frame_pointer_rtx
6496 || x == arg_pointer_rtx)));
6499 /* Return nonzero if x is a legitimate index register. This is the case
6500 for any base register that can access a QImode object. */
6501 inline static int
6502 thumb1_index_register_rtx_p (rtx x, int strict_p)
6504 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6507 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6509 The AP may be eliminated to either the SP or the FP, so we use the
6510 least common denominator, e.g. SImode, and offsets from 0 to 64.
6512 ??? Verify whether the above is the right approach.
6514 ??? Also, the FP may be eliminated to the SP, so perhaps that
6515 needs special handling also.
6517 ??? Look at how the mips16 port solves this problem. It probably uses
6518 better ways to solve some of these problems.
6520 Although it is not incorrect, we don't accept QImode and HImode
6521 addresses based on the frame pointer or arg pointer until the
6522 reload pass starts. This is so that eliminating such addresses
6523 into stack based ones won't produce impossible code. */
6525 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6527 /* ??? Not clear if this is right. Experiment. */
6528 if (GET_MODE_SIZE (mode) < 4
6529 && !(reload_in_progress || reload_completed)
6530 && (reg_mentioned_p (frame_pointer_rtx, x)
6531 || reg_mentioned_p (arg_pointer_rtx, x)
6532 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6533 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6534 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6535 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6536 return 0;
6538 /* Accept any base register. SP only in SImode or larger. */
6539 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6540 return 1;
6542 /* This is PC relative data before arm_reorg runs. */
6543 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6544 && GET_CODE (x) == SYMBOL_REF
6545 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6546 return 1;
6548 /* This is PC relative data after arm_reorg runs. */
6549 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6550 && reload_completed
6551 && (GET_CODE (x) == LABEL_REF
6552 || (GET_CODE (x) == CONST
6553 && GET_CODE (XEXP (x, 0)) == PLUS
6554 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6555 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6556 return 1;
6558 /* Post-inc indexing only supported for SImode and larger. */
6559 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6560 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6561 return 1;
6563 else if (GET_CODE (x) == PLUS)
6565 /* REG+REG address can be any two index registers. */
6566 /* We disallow FRAME+REG addressing since we know that FRAME
6567 will be replaced with STACK, and SP relative addressing only
6568 permits SP+OFFSET. */
6569 if (GET_MODE_SIZE (mode) <= 4
6570 && XEXP (x, 0) != frame_pointer_rtx
6571 && XEXP (x, 1) != frame_pointer_rtx
6572 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6573 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6574 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6575 return 1;
6577 /* REG+const has 5-7 bit offset for non-SP registers. */
6578 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6579 || XEXP (x, 0) == arg_pointer_rtx)
6580 && CONST_INT_P (XEXP (x, 1))
6581 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6582 return 1;
6584 /* REG+const has 10-bit offset for SP, but only SImode and
6585 larger is supported. */
6586 /* ??? Should probably check for DI/DFmode overflow here
6587 just like GO_IF_LEGITIMATE_OFFSET does. */
6588 else if (REG_P (XEXP (x, 0))
6589 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6590 && GET_MODE_SIZE (mode) >= 4
6591 && CONST_INT_P (XEXP (x, 1))
6592 && INTVAL (XEXP (x, 1)) >= 0
6593 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6594 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6595 return 1;
6597 else if (REG_P (XEXP (x, 0))
6598 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6599 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6600 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6601 && REGNO (XEXP (x, 0))
6602 <= LAST_VIRTUAL_POINTER_REGISTER))
6603 && GET_MODE_SIZE (mode) >= 4
6604 && CONST_INT_P (XEXP (x, 1))
6605 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6606 return 1;
6609 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6610 && GET_MODE_SIZE (mode) == 4
6611 && GET_CODE (x) == SYMBOL_REF
6612 && CONSTANT_POOL_ADDRESS_P (x)
6613 && ! (flag_pic
6614 && symbol_mentioned_p (get_pool_constant (x))
6615 && ! pcrel_constant_p (get_pool_constant (x))))
6616 return 1;
6618 return 0;
6621 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6622 instruction of mode MODE. */
6624 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6626 switch (GET_MODE_SIZE (mode))
6628 case 1:
6629 return val >= 0 && val < 32;
6631 case 2:
6632 return val >= 0 && val < 64 && (val & 1) == 0;
6634 default:
6635 return (val >= 0
6636 && (val + GET_MODE_SIZE (mode)) <= 128
6637 && (val & 3) == 0);
6641 bool
6642 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6644 if (TARGET_ARM)
6645 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6646 else if (TARGET_THUMB2)
6647 return thumb2_legitimate_address_p (mode, x, strict_p);
6648 else /* if (TARGET_THUMB1) */
6649 return thumb1_legitimate_address_p (mode, x, strict_p);
6652 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6654 Given an rtx X being reloaded into a reg required to be
6655 in class CLASS, return the class of reg to actually use.
6656 In general this is just CLASS, but for the Thumb core registers and
6657 immediate constants we prefer a LO_REGS class or a subset. */
6659 static reg_class_t
6660 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6662 if (TARGET_32BIT)
6663 return rclass;
6664 else
6666 if (rclass == GENERAL_REGS
6667 || rclass == HI_REGS
6668 || rclass == NO_REGS
6669 || rclass == STACK_REG)
6670 return LO_REGS;
6671 else
6672 return rclass;
6676 /* Build the SYMBOL_REF for __tls_get_addr. */
6678 static GTY(()) rtx tls_get_addr_libfunc;
6680 static rtx
6681 get_tls_get_addr (void)
6683 if (!tls_get_addr_libfunc)
6684 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6685 return tls_get_addr_libfunc;
6689 arm_load_tp (rtx target)
6691 if (!target)
6692 target = gen_reg_rtx (SImode);
6694 if (TARGET_HARD_TP)
6696 /* Can return in any reg. */
6697 emit_insn (gen_load_tp_hard (target));
6699 else
6701 /* Always returned in r0. Immediately copy the result into a pseudo,
6702 otherwise other uses of r0 (e.g. setting up function arguments) may
6703 clobber the value. */
6705 rtx tmp;
6707 emit_insn (gen_load_tp_soft ());
6709 tmp = gen_rtx_REG (SImode, 0);
6710 emit_move_insn (target, tmp);
6712 return target;
6715 static rtx
6716 load_tls_operand (rtx x, rtx reg)
6718 rtx tmp;
6720 if (reg == NULL_RTX)
6721 reg = gen_reg_rtx (SImode);
6723 tmp = gen_rtx_CONST (SImode, x);
6725 emit_move_insn (reg, tmp);
6727 return reg;
6730 static rtx
6731 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6733 rtx insns, label, labelno, sum;
6735 gcc_assert (reloc != TLS_DESCSEQ);
6736 start_sequence ();
6738 labelno = GEN_INT (pic_labelno++);
6739 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6740 label = gen_rtx_CONST (VOIDmode, label);
6742 sum = gen_rtx_UNSPEC (Pmode,
6743 gen_rtvec (4, x, GEN_INT (reloc), label,
6744 GEN_INT (TARGET_ARM ? 8 : 4)),
6745 UNSPEC_TLS);
6746 reg = load_tls_operand (sum, reg);
6748 if (TARGET_ARM)
6749 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6750 else
6751 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6753 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6754 LCT_PURE, /* LCT_CONST? */
6755 Pmode, 1, reg, Pmode);
6757 insns = get_insns ();
6758 end_sequence ();
6760 return insns;
6763 static rtx
6764 arm_tls_descseq_addr (rtx x, rtx reg)
6766 rtx labelno = GEN_INT (pic_labelno++);
6767 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6768 rtx sum = gen_rtx_UNSPEC (Pmode,
6769 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6770 gen_rtx_CONST (VOIDmode, label),
6771 GEN_INT (!TARGET_ARM)),
6772 UNSPEC_TLS);
6773 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6775 emit_insn (gen_tlscall (x, labelno));
6776 if (!reg)
6777 reg = gen_reg_rtx (SImode);
6778 else
6779 gcc_assert (REGNO (reg) != 0);
6781 emit_move_insn (reg, reg0);
6783 return reg;
6787 legitimize_tls_address (rtx x, rtx reg)
6789 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6790 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6792 switch (model)
6794 case TLS_MODEL_GLOBAL_DYNAMIC:
6795 if (TARGET_GNU2_TLS)
6797 reg = arm_tls_descseq_addr (x, reg);
6799 tp = arm_load_tp (NULL_RTX);
6801 dest = gen_rtx_PLUS (Pmode, tp, reg);
6803 else
6805 /* Original scheme */
6806 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6807 dest = gen_reg_rtx (Pmode);
6808 emit_libcall_block (insns, dest, ret, x);
6810 return dest;
6812 case TLS_MODEL_LOCAL_DYNAMIC:
6813 if (TARGET_GNU2_TLS)
6815 reg = arm_tls_descseq_addr (x, reg);
6817 tp = arm_load_tp (NULL_RTX);
6819 dest = gen_rtx_PLUS (Pmode, tp, reg);
6821 else
6823 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6825 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6826 share the LDM result with other LD model accesses. */
6827 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6828 UNSPEC_TLS);
6829 dest = gen_reg_rtx (Pmode);
6830 emit_libcall_block (insns, dest, ret, eqv);
6832 /* Load the addend. */
6833 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6834 GEN_INT (TLS_LDO32)),
6835 UNSPEC_TLS);
6836 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6837 dest = gen_rtx_PLUS (Pmode, dest, addend);
6839 return dest;
6841 case TLS_MODEL_INITIAL_EXEC:
6842 labelno = GEN_INT (pic_labelno++);
6843 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6844 label = gen_rtx_CONST (VOIDmode, label);
6845 sum = gen_rtx_UNSPEC (Pmode,
6846 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6847 GEN_INT (TARGET_ARM ? 8 : 4)),
6848 UNSPEC_TLS);
6849 reg = load_tls_operand (sum, reg);
6851 if (TARGET_ARM)
6852 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6853 else if (TARGET_THUMB2)
6854 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6855 else
6857 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6858 emit_move_insn (reg, gen_const_mem (SImode, reg));
6861 tp = arm_load_tp (NULL_RTX);
6863 return gen_rtx_PLUS (Pmode, tp, reg);
6865 case TLS_MODEL_LOCAL_EXEC:
6866 tp = arm_load_tp (NULL_RTX);
6868 reg = gen_rtx_UNSPEC (Pmode,
6869 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6870 UNSPEC_TLS);
6871 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6873 return gen_rtx_PLUS (Pmode, tp, reg);
6875 default:
6876 abort ();
6880 /* Try machine-dependent ways of modifying an illegitimate address
6881 to be legitimate. If we find one, return the new, valid address. */
6883 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6885 if (!TARGET_ARM)
6887 /* TODO: legitimize_address for Thumb2. */
6888 if (TARGET_THUMB2)
6889 return x;
6890 return thumb_legitimize_address (x, orig_x, mode);
6893 if (arm_tls_symbol_p (x))
6894 return legitimize_tls_address (x, NULL_RTX);
6896 if (GET_CODE (x) == PLUS)
6898 rtx xop0 = XEXP (x, 0);
6899 rtx xop1 = XEXP (x, 1);
6901 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6902 xop0 = force_reg (SImode, xop0);
6904 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6905 xop1 = force_reg (SImode, xop1);
6907 if (ARM_BASE_REGISTER_RTX_P (xop0)
6908 && CONST_INT_P (xop1))
6910 HOST_WIDE_INT n, low_n;
6911 rtx base_reg, val;
6912 n = INTVAL (xop1);
6914 /* VFP addressing modes actually allow greater offsets, but for
6915 now we just stick with the lowest common denominator. */
6916 if (mode == DImode
6917 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6919 low_n = n & 0x0f;
6920 n &= ~0x0f;
6921 if (low_n > 4)
6923 n += 16;
6924 low_n -= 16;
6927 else
6929 low_n = ((mode) == TImode ? 0
6930 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6931 n -= low_n;
6934 base_reg = gen_reg_rtx (SImode);
6935 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6936 emit_move_insn (base_reg, val);
6937 x = plus_constant (Pmode, base_reg, low_n);
6939 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6940 x = gen_rtx_PLUS (SImode, xop0, xop1);
6943 /* XXX We don't allow MINUS any more -- see comment in
6944 arm_legitimate_address_outer_p (). */
6945 else if (GET_CODE (x) == MINUS)
6947 rtx xop0 = XEXP (x, 0);
6948 rtx xop1 = XEXP (x, 1);
6950 if (CONSTANT_P (xop0))
6951 xop0 = force_reg (SImode, xop0);
6953 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6954 xop1 = force_reg (SImode, xop1);
6956 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6957 x = gen_rtx_MINUS (SImode, xop0, xop1);
6960 /* Make sure to take full advantage of the pre-indexed addressing mode
6961 with absolute addresses which often allows for the base register to
6962 be factorized for multiple adjacent memory references, and it might
6963 even allows for the mini pool to be avoided entirely. */
6964 else if (CONST_INT_P (x) && optimize > 0)
6966 unsigned int bits;
6967 HOST_WIDE_INT mask, base, index;
6968 rtx base_reg;
6970 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6971 use a 8-bit index. So let's use a 12-bit index for SImode only and
6972 hope that arm_gen_constant will enable ldrb to use more bits. */
6973 bits = (mode == SImode) ? 12 : 8;
6974 mask = (1 << bits) - 1;
6975 base = INTVAL (x) & ~mask;
6976 index = INTVAL (x) & mask;
6977 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6979 /* It'll most probably be more efficient to generate the base
6980 with more bits set and use a negative index instead. */
6981 base |= mask;
6982 index -= mask;
6984 base_reg = force_reg (SImode, GEN_INT (base));
6985 x = plus_constant (Pmode, base_reg, index);
6988 if (flag_pic)
6990 /* We need to find and carefully transform any SYMBOL and LABEL
6991 references; so go back to the original address expression. */
6992 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6994 if (new_x != orig_x)
6995 x = new_x;
6998 return x;
7002 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7003 to be legitimate. If we find one, return the new, valid address. */
7005 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7007 if (arm_tls_symbol_p (x))
7008 return legitimize_tls_address (x, NULL_RTX);
7010 if (GET_CODE (x) == PLUS
7011 && CONST_INT_P (XEXP (x, 1))
7012 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7013 || INTVAL (XEXP (x, 1)) < 0))
7015 rtx xop0 = XEXP (x, 0);
7016 rtx xop1 = XEXP (x, 1);
7017 HOST_WIDE_INT offset = INTVAL (xop1);
7019 /* Try and fold the offset into a biasing of the base register and
7020 then offsetting that. Don't do this when optimizing for space
7021 since it can cause too many CSEs. */
7022 if (optimize_size && offset >= 0
7023 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7025 HOST_WIDE_INT delta;
7027 if (offset >= 256)
7028 delta = offset - (256 - GET_MODE_SIZE (mode));
7029 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7030 delta = 31 * GET_MODE_SIZE (mode);
7031 else
7032 delta = offset & (~31 * GET_MODE_SIZE (mode));
7034 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7035 NULL_RTX);
7036 x = plus_constant (Pmode, xop0, delta);
7038 else if (offset < 0 && offset > -256)
7039 /* Small negative offsets are best done with a subtract before the
7040 dereference, forcing these into a register normally takes two
7041 instructions. */
7042 x = force_operand (x, NULL_RTX);
7043 else
7045 /* For the remaining cases, force the constant into a register. */
7046 xop1 = force_reg (SImode, xop1);
7047 x = gen_rtx_PLUS (SImode, xop0, xop1);
7050 else if (GET_CODE (x) == PLUS
7051 && s_register_operand (XEXP (x, 1), SImode)
7052 && !s_register_operand (XEXP (x, 0), SImode))
7054 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7056 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7059 if (flag_pic)
7061 /* We need to find and carefully transform any SYMBOL and LABEL
7062 references; so go back to the original address expression. */
7063 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7065 if (new_x != orig_x)
7066 x = new_x;
7069 return x;
7072 bool
7073 arm_legitimize_reload_address (rtx *p,
7074 enum machine_mode mode,
7075 int opnum, int type,
7076 int ind_levels ATTRIBUTE_UNUSED)
7078 /* We must recognize output that we have already generated ourselves. */
7079 if (GET_CODE (*p) == PLUS
7080 && GET_CODE (XEXP (*p, 0)) == PLUS
7081 && REG_P (XEXP (XEXP (*p, 0), 0))
7082 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7083 && CONST_INT_P (XEXP (*p, 1)))
7085 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7086 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7087 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7088 return true;
7091 if (GET_CODE (*p) == PLUS
7092 && REG_P (XEXP (*p, 0))
7093 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7094 /* If the base register is equivalent to a constant, let the generic
7095 code handle it. Otherwise we will run into problems if a future
7096 reload pass decides to rematerialize the constant. */
7097 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7098 && CONST_INT_P (XEXP (*p, 1)))
7100 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7101 HOST_WIDE_INT low, high;
7103 /* Detect coprocessor load/stores. */
7104 bool coproc_p = ((TARGET_HARD_FLOAT
7105 && TARGET_VFP
7106 && (mode == SFmode || mode == DFmode))
7107 || (TARGET_REALLY_IWMMXT
7108 && VALID_IWMMXT_REG_MODE (mode))
7109 || (TARGET_NEON
7110 && (VALID_NEON_DREG_MODE (mode)
7111 || VALID_NEON_QREG_MODE (mode))));
7113 /* For some conditions, bail out when lower two bits are unaligned. */
7114 if ((val & 0x3) != 0
7115 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7116 && (coproc_p
7117 /* For DI, and DF under soft-float: */
7118 || ((mode == DImode || mode == DFmode)
7119 /* Without ldrd, we use stm/ldm, which does not
7120 fair well with unaligned bits. */
7121 && (! TARGET_LDRD
7122 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7123 || TARGET_THUMB2))))
7124 return false;
7126 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7127 of which the (reg+high) gets turned into a reload add insn,
7128 we try to decompose the index into high/low values that can often
7129 also lead to better reload CSE.
7130 For example:
7131 ldr r0, [r2, #4100] // Offset too large
7132 ldr r1, [r2, #4104] // Offset too large
7134 is best reloaded as:
7135 add t1, r2, #4096
7136 ldr r0, [t1, #4]
7137 add t2, r2, #4096
7138 ldr r1, [t2, #8]
7140 which post-reload CSE can simplify in most cases to eliminate the
7141 second add instruction:
7142 add t1, r2, #4096
7143 ldr r0, [t1, #4]
7144 ldr r1, [t1, #8]
7146 The idea here is that we want to split out the bits of the constant
7147 as a mask, rather than as subtracting the maximum offset that the
7148 respective type of load/store used can handle.
7150 When encountering negative offsets, we can still utilize it even if
7151 the overall offset is positive; sometimes this may lead to an immediate
7152 that can be constructed with fewer instructions.
7153 For example:
7154 ldr r0, [r2, #0x3FFFFC]
7156 This is best reloaded as:
7157 add t1, r2, #0x400000
7158 ldr r0, [t1, #-4]
7160 The trick for spotting this for a load insn with N bits of offset
7161 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7162 negative offset that is going to make bit N and all the bits below
7163 it become zero in the remainder part.
7165 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7166 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7167 used in most cases of ARM load/store instructions. */
7169 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7170 (((VAL) & ((1 << (N)) - 1)) \
7171 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7172 : 0)
7174 if (coproc_p)
7176 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7178 /* NEON quad-word load/stores are made of two double-word accesses,
7179 so the valid index range is reduced by 8. Treat as 9-bit range if
7180 we go over it. */
7181 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7182 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7184 else if (GET_MODE_SIZE (mode) == 8)
7186 if (TARGET_LDRD)
7187 low = (TARGET_THUMB2
7188 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7189 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7190 else
7191 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7192 to access doublewords. The supported load/store offsets are
7193 -8, -4, and 4, which we try to produce here. */
7194 low = ((val & 0xf) ^ 0x8) - 0x8;
7196 else if (GET_MODE_SIZE (mode) < 8)
7198 /* NEON element load/stores do not have an offset. */
7199 if (TARGET_NEON_FP16 && mode == HFmode)
7200 return false;
7202 if (TARGET_THUMB2)
7204 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7205 Try the wider 12-bit range first, and re-try if the result
7206 is out of range. */
7207 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7208 if (low < -255)
7209 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7211 else
7213 if (mode == HImode || mode == HFmode)
7215 if (arm_arch4)
7216 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7217 else
7219 /* The storehi/movhi_bytes fallbacks can use only
7220 [-4094,+4094] of the full ldrb/strb index range. */
7221 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7222 if (low == 4095 || low == -4095)
7223 return false;
7226 else
7227 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7230 else
7231 return false;
7233 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7234 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7235 - (unsigned HOST_WIDE_INT) 0x80000000);
7236 /* Check for overflow or zero */
7237 if (low == 0 || high == 0 || (high + low != val))
7238 return false;
7240 /* Reload the high part into a base reg; leave the low part
7241 in the mem.
7242 Note that replacing this gen_rtx_PLUS with plus_constant is
7243 wrong in this case because we rely on the
7244 (plus (plus reg c1) c2) structure being preserved so that
7245 XEXP (*p, 0) in push_reload below uses the correct term. */
7246 *p = gen_rtx_PLUS (GET_MODE (*p),
7247 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7248 GEN_INT (high)),
7249 GEN_INT (low));
7250 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7251 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7252 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7253 return true;
7256 return false;
7260 thumb_legitimize_reload_address (rtx *x_p,
7261 enum machine_mode mode,
7262 int opnum, int type,
7263 int ind_levels ATTRIBUTE_UNUSED)
7265 rtx x = *x_p;
7267 if (GET_CODE (x) == PLUS
7268 && GET_MODE_SIZE (mode) < 4
7269 && REG_P (XEXP (x, 0))
7270 && XEXP (x, 0) == stack_pointer_rtx
7271 && CONST_INT_P (XEXP (x, 1))
7272 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7274 rtx orig_x = x;
7276 x = copy_rtx (x);
7277 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7278 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7279 return x;
7282 /* If both registers are hi-regs, then it's better to reload the
7283 entire expression rather than each register individually. That
7284 only requires one reload register rather than two. */
7285 if (GET_CODE (x) == PLUS
7286 && REG_P (XEXP (x, 0))
7287 && REG_P (XEXP (x, 1))
7288 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7289 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7291 rtx orig_x = x;
7293 x = copy_rtx (x);
7294 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7295 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7296 return x;
7299 return NULL;
7302 /* Test for various thread-local symbols. */
7304 /* Return TRUE if X is a thread-local symbol. */
7306 static bool
7307 arm_tls_symbol_p (rtx x)
7309 if (! TARGET_HAVE_TLS)
7310 return false;
7312 if (GET_CODE (x) != SYMBOL_REF)
7313 return false;
7315 return SYMBOL_REF_TLS_MODEL (x) != 0;
7318 /* Helper for arm_tls_referenced_p. */
7320 static int
7321 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7323 if (GET_CODE (*x) == SYMBOL_REF)
7324 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7326 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7327 TLS offsets, not real symbol references. */
7328 if (GET_CODE (*x) == UNSPEC
7329 && XINT (*x, 1) == UNSPEC_TLS)
7330 return -1;
7332 return 0;
7335 /* Return TRUE if X contains any TLS symbol references. */
7337 bool
7338 arm_tls_referenced_p (rtx x)
7340 if (! TARGET_HAVE_TLS)
7341 return false;
7343 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7346 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7348 On the ARM, allow any integer (invalid ones are removed later by insn
7349 patterns), nice doubles and symbol_refs which refer to the function's
7350 constant pool XXX.
7352 When generating pic allow anything. */
7354 static bool
7355 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7357 /* At present, we have no support for Neon structure constants, so forbid
7358 them here. It might be possible to handle simple cases like 0 and -1
7359 in future. */
7360 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7361 return false;
7363 return flag_pic || !label_mentioned_p (x);
7366 static bool
7367 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7369 return (CONST_INT_P (x)
7370 || CONST_DOUBLE_P (x)
7371 || CONSTANT_ADDRESS_P (x)
7372 || flag_pic);
7375 static bool
7376 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7378 return (!arm_cannot_force_const_mem (mode, x)
7379 && (TARGET_32BIT
7380 ? arm_legitimate_constant_p_1 (mode, x)
7381 : thumb_legitimate_constant_p (mode, x)));
7384 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7386 static bool
7387 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7389 rtx base, offset;
7391 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7393 split_const (x, &base, &offset);
7394 if (GET_CODE (base) == SYMBOL_REF
7395 && !offset_within_block_p (base, INTVAL (offset)))
7396 return true;
7398 return arm_tls_referenced_p (x);
7401 #define REG_OR_SUBREG_REG(X) \
7402 (REG_P (X) \
7403 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7405 #define REG_OR_SUBREG_RTX(X) \
7406 (REG_P (X) ? (X) : SUBREG_REG (X))
7408 static inline int
7409 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7411 enum machine_mode mode = GET_MODE (x);
7412 int total, words;
7414 switch (code)
7416 case ASHIFT:
7417 case ASHIFTRT:
7418 case LSHIFTRT:
7419 case ROTATERT:
7420 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7422 case PLUS:
7423 case MINUS:
7424 case COMPARE:
7425 case NEG:
7426 case NOT:
7427 return COSTS_N_INSNS (1);
7429 case MULT:
7430 if (CONST_INT_P (XEXP (x, 1)))
7432 int cycles = 0;
7433 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7435 while (i)
7437 i >>= 2;
7438 cycles++;
7440 return COSTS_N_INSNS (2) + cycles;
7442 return COSTS_N_INSNS (1) + 16;
7444 case SET:
7445 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7446 the mode. */
7447 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7448 return (COSTS_N_INSNS (words)
7449 + 4 * ((MEM_P (SET_SRC (x)))
7450 + MEM_P (SET_DEST (x))));
7452 case CONST_INT:
7453 if (outer == SET)
7455 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7456 return 0;
7457 if (thumb_shiftable_const (INTVAL (x)))
7458 return COSTS_N_INSNS (2);
7459 return COSTS_N_INSNS (3);
7461 else if ((outer == PLUS || outer == COMPARE)
7462 && INTVAL (x) < 256 && INTVAL (x) > -256)
7463 return 0;
7464 else if ((outer == IOR || outer == XOR || outer == AND)
7465 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7466 return COSTS_N_INSNS (1);
7467 else if (outer == AND)
7469 int i;
7470 /* This duplicates the tests in the andsi3 expander. */
7471 for (i = 9; i <= 31; i++)
7472 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7473 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7474 return COSTS_N_INSNS (2);
7476 else if (outer == ASHIFT || outer == ASHIFTRT
7477 || outer == LSHIFTRT)
7478 return 0;
7479 return COSTS_N_INSNS (2);
7481 case CONST:
7482 case CONST_DOUBLE:
7483 case LABEL_REF:
7484 case SYMBOL_REF:
7485 return COSTS_N_INSNS (3);
7487 case UDIV:
7488 case UMOD:
7489 case DIV:
7490 case MOD:
7491 return 100;
7493 case TRUNCATE:
7494 return 99;
7496 case AND:
7497 case XOR:
7498 case IOR:
7499 /* XXX guess. */
7500 return 8;
7502 case MEM:
7503 /* XXX another guess. */
7504 /* Memory costs quite a lot for the first word, but subsequent words
7505 load at the equivalent of a single insn each. */
7506 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7507 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7508 ? 4 : 0));
7510 case IF_THEN_ELSE:
7511 /* XXX a guess. */
7512 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7513 return 14;
7514 return 2;
7516 case SIGN_EXTEND:
7517 case ZERO_EXTEND:
7518 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7519 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7521 if (mode == SImode)
7522 return total;
7524 if (arm_arch6)
7525 return total + COSTS_N_INSNS (1);
7527 /* Assume a two-shift sequence. Increase the cost slightly so
7528 we prefer actual shifts over an extend operation. */
7529 return total + 1 + COSTS_N_INSNS (2);
7531 default:
7532 return 99;
7536 static inline bool
7537 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7539 enum machine_mode mode = GET_MODE (x);
7540 enum rtx_code subcode;
7541 rtx operand;
7542 enum rtx_code code = GET_CODE (x);
7543 *total = 0;
7545 switch (code)
7547 case MEM:
7548 /* Memory costs quite a lot for the first word, but subsequent words
7549 load at the equivalent of a single insn each. */
7550 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7551 return true;
7553 case DIV:
7554 case MOD:
7555 case UDIV:
7556 case UMOD:
7557 if (TARGET_HARD_FLOAT && mode == SFmode)
7558 *total = COSTS_N_INSNS (2);
7559 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7560 *total = COSTS_N_INSNS (4);
7561 else
7562 *total = COSTS_N_INSNS (20);
7563 return false;
7565 case ROTATE:
7566 if (REG_P (XEXP (x, 1)))
7567 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7568 else if (!CONST_INT_P (XEXP (x, 1)))
7569 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7571 /* Fall through */
7572 case ROTATERT:
7573 if (mode != SImode)
7575 *total += COSTS_N_INSNS (4);
7576 return true;
7579 /* Fall through */
7580 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7581 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7582 if (mode == DImode)
7584 *total += COSTS_N_INSNS (3);
7585 return true;
7588 *total += COSTS_N_INSNS (1);
7589 /* Increase the cost of complex shifts because they aren't any faster,
7590 and reduce dual issue opportunities. */
7591 if (arm_tune_cortex_a9
7592 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7593 ++*total;
7595 return true;
7597 case MINUS:
7598 if (mode == DImode)
7600 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7601 if (CONST_INT_P (XEXP (x, 0))
7602 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7604 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7605 return true;
7608 if (CONST_INT_P (XEXP (x, 1))
7609 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7611 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7612 return true;
7615 return false;
7618 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7620 if (TARGET_HARD_FLOAT
7621 && (mode == SFmode
7622 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7624 *total = COSTS_N_INSNS (1);
7625 if (CONST_DOUBLE_P (XEXP (x, 0))
7626 && arm_const_double_rtx (XEXP (x, 0)))
7628 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7629 return true;
7632 if (CONST_DOUBLE_P (XEXP (x, 1))
7633 && arm_const_double_rtx (XEXP (x, 1)))
7635 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7636 return true;
7639 return false;
7641 *total = COSTS_N_INSNS (20);
7642 return false;
7645 *total = COSTS_N_INSNS (1);
7646 if (CONST_INT_P (XEXP (x, 0))
7647 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7649 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7650 return true;
7653 subcode = GET_CODE (XEXP (x, 1));
7654 if (subcode == ASHIFT || subcode == ASHIFTRT
7655 || subcode == LSHIFTRT
7656 || subcode == ROTATE || subcode == ROTATERT)
7658 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7659 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7660 return true;
7663 /* A shift as a part of RSB costs no more than RSB itself. */
7664 if (GET_CODE (XEXP (x, 0)) == MULT
7665 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7667 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7668 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7669 return true;
7672 if (subcode == MULT
7673 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7675 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7676 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7677 return true;
7680 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7681 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7683 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7684 if (REG_P (XEXP (XEXP (x, 1), 0))
7685 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7686 *total += COSTS_N_INSNS (1);
7688 return true;
7691 /* Fall through */
7693 case PLUS:
7694 if (code == PLUS && arm_arch6 && mode == SImode
7695 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7696 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7698 *total = COSTS_N_INSNS (1);
7699 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7700 0, speed);
7701 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7702 return true;
7705 /* MLA: All arguments must be registers. We filter out
7706 multiplication by a power of two, so that we fall down into
7707 the code below. */
7708 if (GET_CODE (XEXP (x, 0)) == MULT
7709 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7711 /* The cost comes from the cost of the multiply. */
7712 return false;
7715 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7717 if (TARGET_HARD_FLOAT
7718 && (mode == SFmode
7719 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7721 *total = COSTS_N_INSNS (1);
7722 if (CONST_DOUBLE_P (XEXP (x, 1))
7723 && arm_const_double_rtx (XEXP (x, 1)))
7725 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7726 return true;
7729 return false;
7732 *total = COSTS_N_INSNS (20);
7733 return false;
7736 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7737 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7739 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7740 if (REG_P (XEXP (XEXP (x, 0), 0))
7741 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7742 *total += COSTS_N_INSNS (1);
7743 return true;
7746 /* Fall through */
7748 case AND: case XOR: case IOR:
7750 /* Normally the frame registers will be spilt into reg+const during
7751 reload, so it is a bad idea to combine them with other instructions,
7752 since then they might not be moved outside of loops. As a compromise
7753 we allow integration with ops that have a constant as their second
7754 operand. */
7755 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7756 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7757 && !CONST_INT_P (XEXP (x, 1)))
7758 *total = COSTS_N_INSNS (1);
7760 if (mode == DImode)
7762 *total += COSTS_N_INSNS (2);
7763 if (CONST_INT_P (XEXP (x, 1))
7764 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7766 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7767 return true;
7770 return false;
7773 *total += COSTS_N_INSNS (1);
7774 if (CONST_INT_P (XEXP (x, 1))
7775 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7777 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7778 return true;
7780 subcode = GET_CODE (XEXP (x, 0));
7781 if (subcode == ASHIFT || subcode == ASHIFTRT
7782 || subcode == LSHIFTRT
7783 || subcode == ROTATE || subcode == ROTATERT)
7785 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7786 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7787 return true;
7790 if (subcode == MULT
7791 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7793 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7794 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7795 return true;
7798 if (subcode == UMIN || subcode == UMAX
7799 || subcode == SMIN || subcode == SMAX)
7801 *total = COSTS_N_INSNS (3);
7802 return true;
7805 return false;
7807 case MULT:
7808 /* This should have been handled by the CPU specific routines. */
7809 gcc_unreachable ();
7811 case TRUNCATE:
7812 if (arm_arch3m && mode == SImode
7813 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7814 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7815 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7816 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7817 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7818 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7820 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7821 return true;
7823 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7824 return false;
7826 case NEG:
7827 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7829 if (TARGET_HARD_FLOAT
7830 && (mode == SFmode
7831 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7833 *total = COSTS_N_INSNS (1);
7834 return false;
7836 *total = COSTS_N_INSNS (2);
7837 return false;
7840 /* Fall through */
7841 case NOT:
7842 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7843 if (mode == SImode && code == NOT)
7845 subcode = GET_CODE (XEXP (x, 0));
7846 if (subcode == ASHIFT || subcode == ASHIFTRT
7847 || subcode == LSHIFTRT
7848 || subcode == ROTATE || subcode == ROTATERT
7849 || (subcode == MULT
7850 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7852 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7853 /* Register shifts cost an extra cycle. */
7854 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7855 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7856 subcode, 1, speed);
7857 return true;
7861 return false;
7863 case IF_THEN_ELSE:
7864 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7866 *total = COSTS_N_INSNS (4);
7867 return true;
7870 operand = XEXP (x, 0);
7872 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7873 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7874 && REG_P (XEXP (operand, 0))
7875 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7876 *total += COSTS_N_INSNS (1);
7877 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7878 + rtx_cost (XEXP (x, 2), code, 2, speed));
7879 return true;
7881 case NE:
7882 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7884 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7885 return true;
7887 goto scc_insn;
7889 case GE:
7890 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7891 && mode == SImode && XEXP (x, 1) == const0_rtx)
7893 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7894 return true;
7896 goto scc_insn;
7898 case LT:
7899 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7900 && mode == SImode && XEXP (x, 1) == const0_rtx)
7902 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7903 return true;
7905 goto scc_insn;
7907 case EQ:
7908 case GT:
7909 case LE:
7910 case GEU:
7911 case LTU:
7912 case GTU:
7913 case LEU:
7914 case UNORDERED:
7915 case ORDERED:
7916 case UNEQ:
7917 case UNGE:
7918 case UNLT:
7919 case UNGT:
7920 case UNLE:
7921 scc_insn:
7922 /* SCC insns. In the case where the comparison has already been
7923 performed, then they cost 2 instructions. Otherwise they need
7924 an additional comparison before them. */
7925 *total = COSTS_N_INSNS (2);
7926 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7928 return true;
7931 /* Fall through */
7932 case COMPARE:
7933 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7935 *total = 0;
7936 return true;
7939 *total += COSTS_N_INSNS (1);
7940 if (CONST_INT_P (XEXP (x, 1))
7941 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7943 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7944 return true;
7947 subcode = GET_CODE (XEXP (x, 0));
7948 if (subcode == ASHIFT || subcode == ASHIFTRT
7949 || subcode == LSHIFTRT
7950 || subcode == ROTATE || subcode == ROTATERT)
7952 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7953 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7954 return true;
7957 if (subcode == MULT
7958 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7960 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7961 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7962 return true;
7965 return false;
7967 case UMIN:
7968 case UMAX:
7969 case SMIN:
7970 case SMAX:
7971 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7972 if (!CONST_INT_P (XEXP (x, 1))
7973 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7974 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7975 return true;
7977 case ABS:
7978 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7980 if (TARGET_HARD_FLOAT
7981 && (mode == SFmode
7982 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7984 *total = COSTS_N_INSNS (1);
7985 return false;
7987 *total = COSTS_N_INSNS (20);
7988 return false;
7990 *total = COSTS_N_INSNS (1);
7991 if (mode == DImode)
7992 *total += COSTS_N_INSNS (3);
7993 return false;
7995 case SIGN_EXTEND:
7996 case ZERO_EXTEND:
7997 *total = 0;
7998 if (GET_MODE_CLASS (mode) == MODE_INT)
8000 rtx op = XEXP (x, 0);
8001 enum machine_mode opmode = GET_MODE (op);
8003 if (mode == DImode)
8004 *total += COSTS_N_INSNS (1);
8006 if (opmode != SImode)
8008 if (MEM_P (op))
8010 /* If !arm_arch4, we use one of the extendhisi2_mem
8011 or movhi_bytes patterns for HImode. For a QImode
8012 sign extension, we first zero-extend from memory
8013 and then perform a shift sequence. */
8014 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8015 *total += COSTS_N_INSNS (2);
8017 else if (arm_arch6)
8018 *total += COSTS_N_INSNS (1);
8020 /* We don't have the necessary insn, so we need to perform some
8021 other operation. */
8022 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8023 /* An and with constant 255. */
8024 *total += COSTS_N_INSNS (1);
8025 else
8026 /* A shift sequence. Increase costs slightly to avoid
8027 combining two shifts into an extend operation. */
8028 *total += COSTS_N_INSNS (2) + 1;
8031 return false;
8034 switch (GET_MODE (XEXP (x, 0)))
8036 case V8QImode:
8037 case V4HImode:
8038 case V2SImode:
8039 case V4QImode:
8040 case V2HImode:
8041 *total = COSTS_N_INSNS (1);
8042 return false;
8044 default:
8045 gcc_unreachable ();
8047 gcc_unreachable ();
8049 case ZERO_EXTRACT:
8050 case SIGN_EXTRACT:
8051 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8052 return true;
8054 case CONST_INT:
8055 if (const_ok_for_arm (INTVAL (x))
8056 || const_ok_for_arm (~INTVAL (x)))
8057 *total = COSTS_N_INSNS (1);
8058 else
8059 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8060 INTVAL (x), NULL_RTX,
8061 NULL_RTX, 0, 0));
8062 return true;
8064 case CONST:
8065 case LABEL_REF:
8066 case SYMBOL_REF:
8067 *total = COSTS_N_INSNS (3);
8068 return true;
8070 case HIGH:
8071 *total = COSTS_N_INSNS (1);
8072 return true;
8074 case LO_SUM:
8075 *total = COSTS_N_INSNS (1);
8076 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8077 return true;
8079 case CONST_DOUBLE:
8080 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8081 && (mode == SFmode || !TARGET_VFP_SINGLE))
8082 *total = COSTS_N_INSNS (1);
8083 else
8084 *total = COSTS_N_INSNS (4);
8085 return true;
8087 case SET:
8088 /* The vec_extract patterns accept memory operands that require an
8089 address reload. Account for the cost of that reload to give the
8090 auto-inc-dec pass an incentive to try to replace them. */
8091 if (TARGET_NEON && MEM_P (SET_DEST (x))
8092 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8094 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8095 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8096 *total += COSTS_N_INSNS (1);
8097 return true;
8099 /* Likewise for the vec_set patterns. */
8100 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8101 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8102 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8104 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8105 *total = rtx_cost (mem, code, 0, speed);
8106 if (!neon_vector_mem_operand (mem, 2, true))
8107 *total += COSTS_N_INSNS (1);
8108 return true;
8110 return false;
8112 case UNSPEC:
8113 /* We cost this as high as our memory costs to allow this to
8114 be hoisted from loops. */
8115 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8117 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8119 return true;
8121 case CONST_VECTOR:
8122 if (TARGET_NEON
8123 && TARGET_HARD_FLOAT
8124 && outer == SET
8125 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8126 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8127 *total = COSTS_N_INSNS (1);
8128 else
8129 *total = COSTS_N_INSNS (4);
8130 return true;
8132 default:
8133 *total = COSTS_N_INSNS (4);
8134 return false;
8138 /* Estimates the size cost of thumb1 instructions.
8139 For now most of the code is copied from thumb1_rtx_costs. We need more
8140 fine grain tuning when we have more related test cases. */
8141 static inline int
8142 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8144 enum machine_mode mode = GET_MODE (x);
8145 int words;
8147 switch (code)
8149 case ASHIFT:
8150 case ASHIFTRT:
8151 case LSHIFTRT:
8152 case ROTATERT:
8153 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8155 case PLUS:
8156 case MINUS:
8157 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8158 defined by RTL expansion, especially for the expansion of
8159 multiplication. */
8160 if ((GET_CODE (XEXP (x, 0)) == MULT
8161 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8162 || (GET_CODE (XEXP (x, 1)) == MULT
8163 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8164 return COSTS_N_INSNS (2);
8165 /* On purpose fall through for normal RTX. */
8166 case COMPARE:
8167 case NEG:
8168 case NOT:
8169 return COSTS_N_INSNS (1);
8171 case MULT:
8172 if (CONST_INT_P (XEXP (x, 1)))
8174 /* Thumb1 mul instruction can't operate on const. We must Load it
8175 into a register first. */
8176 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8177 return COSTS_N_INSNS (1) + const_size;
8179 return COSTS_N_INSNS (1);
8181 case SET:
8182 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8183 the mode. */
8184 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8185 return (COSTS_N_INSNS (words)
8186 + 4 * ((MEM_P (SET_SRC (x)))
8187 + MEM_P (SET_DEST (x))));
8189 case CONST_INT:
8190 if (outer == SET)
8192 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8193 return COSTS_N_INSNS (1);
8194 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8195 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8196 return COSTS_N_INSNS (2);
8197 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8198 if (thumb_shiftable_const (INTVAL (x)))
8199 return COSTS_N_INSNS (2);
8200 return COSTS_N_INSNS (3);
8202 else if ((outer == PLUS || outer == COMPARE)
8203 && INTVAL (x) < 256 && INTVAL (x) > -256)
8204 return 0;
8205 else if ((outer == IOR || outer == XOR || outer == AND)
8206 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8207 return COSTS_N_INSNS (1);
8208 else if (outer == AND)
8210 int i;
8211 /* This duplicates the tests in the andsi3 expander. */
8212 for (i = 9; i <= 31; i++)
8213 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8214 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8215 return COSTS_N_INSNS (2);
8217 else if (outer == ASHIFT || outer == ASHIFTRT
8218 || outer == LSHIFTRT)
8219 return 0;
8220 return COSTS_N_INSNS (2);
8222 case CONST:
8223 case CONST_DOUBLE:
8224 case LABEL_REF:
8225 case SYMBOL_REF:
8226 return COSTS_N_INSNS (3);
8228 case UDIV:
8229 case UMOD:
8230 case DIV:
8231 case MOD:
8232 return 100;
8234 case TRUNCATE:
8235 return 99;
8237 case AND:
8238 case XOR:
8239 case IOR:
8240 /* XXX guess. */
8241 return 8;
8243 case MEM:
8244 /* XXX another guess. */
8245 /* Memory costs quite a lot for the first word, but subsequent words
8246 load at the equivalent of a single insn each. */
8247 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8248 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8249 ? 4 : 0));
8251 case IF_THEN_ELSE:
8252 /* XXX a guess. */
8253 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8254 return 14;
8255 return 2;
8257 case ZERO_EXTEND:
8258 /* XXX still guessing. */
8259 switch (GET_MODE (XEXP (x, 0)))
8261 case QImode:
8262 return (1 + (mode == DImode ? 4 : 0)
8263 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8265 case HImode:
8266 return (4 + (mode == DImode ? 4 : 0)
8267 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8269 case SImode:
8270 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8272 default:
8273 return 99;
8276 default:
8277 return 99;
8281 /* RTX costs when optimizing for size. */
8282 static bool
8283 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8284 int *total)
8286 enum machine_mode mode = GET_MODE (x);
8287 if (TARGET_THUMB1)
8289 *total = thumb1_size_rtx_costs (x, code, outer_code);
8290 return true;
8293 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8294 switch (code)
8296 case MEM:
8297 /* A memory access costs 1 insn if the mode is small, or the address is
8298 a single register, otherwise it costs one insn per word. */
8299 if (REG_P (XEXP (x, 0)))
8300 *total = COSTS_N_INSNS (1);
8301 else if (flag_pic
8302 && GET_CODE (XEXP (x, 0)) == PLUS
8303 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8304 /* This will be split into two instructions.
8305 See arm.md:calculate_pic_address. */
8306 *total = COSTS_N_INSNS (2);
8307 else
8308 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8309 return true;
8311 case DIV:
8312 case MOD:
8313 case UDIV:
8314 case UMOD:
8315 /* Needs a libcall, so it costs about this. */
8316 *total = COSTS_N_INSNS (2);
8317 return false;
8319 case ROTATE:
8320 if (mode == SImode && REG_P (XEXP (x, 1)))
8322 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8323 return true;
8325 /* Fall through */
8326 case ROTATERT:
8327 case ASHIFT:
8328 case LSHIFTRT:
8329 case ASHIFTRT:
8330 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8332 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8333 return true;
8335 else if (mode == SImode)
8337 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8338 /* Slightly disparage register shifts, but not by much. */
8339 if (!CONST_INT_P (XEXP (x, 1)))
8340 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8341 return true;
8344 /* Needs a libcall. */
8345 *total = COSTS_N_INSNS (2);
8346 return false;
8348 case MINUS:
8349 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8350 && (mode == SFmode || !TARGET_VFP_SINGLE))
8352 *total = COSTS_N_INSNS (1);
8353 return false;
8356 if (mode == SImode)
8358 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8359 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8361 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8362 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8363 || subcode1 == ROTATE || subcode1 == ROTATERT
8364 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8365 || subcode1 == ASHIFTRT)
8367 /* It's just the cost of the two operands. */
8368 *total = 0;
8369 return false;
8372 *total = COSTS_N_INSNS (1);
8373 return false;
8376 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8377 return false;
8379 case PLUS:
8380 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8381 && (mode == SFmode || !TARGET_VFP_SINGLE))
8383 *total = COSTS_N_INSNS (1);
8384 return false;
8387 /* A shift as a part of ADD costs nothing. */
8388 if (GET_CODE (XEXP (x, 0)) == MULT
8389 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8391 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8392 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8393 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8394 return true;
8397 /* Fall through */
8398 case AND: case XOR: case IOR:
8399 if (mode == SImode)
8401 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8403 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8404 || subcode == LSHIFTRT || subcode == ASHIFTRT
8405 || (code == AND && subcode == NOT))
8407 /* It's just the cost of the two operands. */
8408 *total = 0;
8409 return false;
8413 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8414 return false;
8416 case MULT:
8417 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8418 return false;
8420 case NEG:
8421 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8422 && (mode == SFmode || !TARGET_VFP_SINGLE))
8424 *total = COSTS_N_INSNS (1);
8425 return false;
8428 /* Fall through */
8429 case NOT:
8430 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8432 return false;
8434 case IF_THEN_ELSE:
8435 *total = 0;
8436 return false;
8438 case COMPARE:
8439 if (cc_register (XEXP (x, 0), VOIDmode))
8440 * total = 0;
8441 else
8442 *total = COSTS_N_INSNS (1);
8443 return false;
8445 case ABS:
8446 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8447 && (mode == SFmode || !TARGET_VFP_SINGLE))
8448 *total = COSTS_N_INSNS (1);
8449 else
8450 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8451 return false;
8453 case SIGN_EXTEND:
8454 case ZERO_EXTEND:
8455 return arm_rtx_costs_1 (x, outer_code, total, 0);
8457 case CONST_INT:
8458 if (const_ok_for_arm (INTVAL (x)))
8459 /* A multiplication by a constant requires another instruction
8460 to load the constant to a register. */
8461 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8462 ? 1 : 0);
8463 else if (const_ok_for_arm (~INTVAL (x)))
8464 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8465 else if (const_ok_for_arm (-INTVAL (x)))
8467 if (outer_code == COMPARE || outer_code == PLUS
8468 || outer_code == MINUS)
8469 *total = 0;
8470 else
8471 *total = COSTS_N_INSNS (1);
8473 else
8474 *total = COSTS_N_INSNS (2);
8475 return true;
8477 case CONST:
8478 case LABEL_REF:
8479 case SYMBOL_REF:
8480 *total = COSTS_N_INSNS (2);
8481 return true;
8483 case CONST_DOUBLE:
8484 *total = COSTS_N_INSNS (4);
8485 return true;
8487 case CONST_VECTOR:
8488 if (TARGET_NEON
8489 && TARGET_HARD_FLOAT
8490 && outer_code == SET
8491 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8492 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8493 *total = COSTS_N_INSNS (1);
8494 else
8495 *total = COSTS_N_INSNS (4);
8496 return true;
8498 case HIGH:
8499 case LO_SUM:
8500 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8501 cost of these slightly. */
8502 *total = COSTS_N_INSNS (1) + 1;
8503 return true;
8505 case SET:
8506 return false;
8508 default:
8509 if (mode != VOIDmode)
8510 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8511 else
8512 *total = COSTS_N_INSNS (4); /* How knows? */
8513 return false;
8517 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8518 operand, then return the operand that is being shifted. If the shift
8519 is not by a constant, then set SHIFT_REG to point to the operand.
8520 Return NULL if OP is not a shifter operand. */
8521 static rtx
8522 shifter_op_p (rtx op, rtx *shift_reg)
8524 enum rtx_code code = GET_CODE (op);
8526 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8527 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8528 return XEXP (op, 0);
8529 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8530 return XEXP (op, 0);
8531 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8532 || code == ASHIFTRT)
8534 if (!CONST_INT_P (XEXP (op, 1)))
8535 *shift_reg = XEXP (op, 1);
8536 return XEXP (op, 0);
8539 return NULL;
8542 static bool
8543 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8545 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8546 gcc_assert (GET_CODE (x) == UNSPEC);
8548 switch (XINT (x, 1))
8550 case UNSPEC_UNALIGNED_LOAD:
8551 /* We can only do unaligned loads into the integer unit, and we can't
8552 use LDM or LDRD. */
8553 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8554 if (speed_p)
8555 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8556 + extra_cost->ldst.load_unaligned);
8558 #ifdef NOT_YET
8559 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8560 ADDR_SPACE_GENERIC, speed_p);
8561 #endif
8562 return true;
8564 case UNSPEC_UNALIGNED_STORE:
8565 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8566 if (speed_p)
8567 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
8568 + extra_cost->ldst.store_unaligned);
8570 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
8571 #ifdef NOT_YET
8572 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8573 ADDR_SPACE_GENERIC, speed_p);
8574 #endif
8575 return true;
8577 case UNSPEC_VRINTZ:
8578 case UNSPEC_VRINTP:
8579 case UNSPEC_VRINTM:
8580 case UNSPEC_VRINTR:
8581 case UNSPEC_VRINTX:
8582 case UNSPEC_VRINTA:
8583 *cost = COSTS_N_INSNS (1);
8584 if (speed_p)
8585 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
8587 return true;
8588 default:
8589 *cost = COSTS_N_INSNS (2);
8590 break;
8592 return false;
8595 /* Cost of a libcall. We assume one insn per argument, an amount for the
8596 call (one insn for -Os) and then one for processing the result. */
8597 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8599 /* RTX costs. Make an estimate of the cost of executing the operation
8600 X, which is contained with an operation with code OUTER_CODE.
8601 SPEED_P indicates whether the cost desired is the performance cost,
8602 or the size cost. The estimate is stored in COST and the return
8603 value is TRUE if the cost calculation is final, or FALSE if the
8604 caller should recurse through the operands of X to add additional
8605 costs.
8607 We currently make no attempt to model the size savings of Thumb-2
8608 16-bit instructions. At the normal points in compilation where
8609 this code is called we have no measure of whether the condition
8610 flags are live or not, and thus no realistic way to determine what
8611 the size will eventually be. */
8612 static bool
8613 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8614 const struct cpu_cost_table *extra_cost,
8615 int *cost, bool speed_p)
8617 enum machine_mode mode = GET_MODE (x);
8619 if (TARGET_THUMB1)
8621 if (speed_p)
8622 *cost = thumb1_rtx_costs (x, code, outer_code);
8623 else
8624 *cost = thumb1_size_rtx_costs (x, code, outer_code);
8625 return true;
8628 switch (code)
8630 case SET:
8631 *cost = 0;
8632 if (REG_P (SET_SRC (x))
8633 && REG_P (SET_DEST (x)))
8635 /* Assume that most copies can be done with a single insn,
8636 unless we don't have HW FP, in which case everything
8637 larger than word mode will require two insns. */
8638 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8639 && GET_MODE_SIZE (mode) > 4)
8640 || mode == DImode)
8641 ? 2 : 1);
8642 /* Conditional register moves can be encoded
8643 in 16 bits in Thumb mode. */
8644 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
8645 *cost >>= 1;
8648 if (CONST_INT_P (SET_SRC (x)))
8650 /* Handle CONST_INT here, since the value doesn't have a mode
8651 and we would otherwise be unable to work out the true cost. */
8652 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
8653 mode = GET_MODE (SET_DEST (x));
8654 outer_code = SET;
8655 /* Slightly lower the cost of setting a core reg to a constant.
8656 This helps break up chains and allows for better scheduling. */
8657 if (REG_P (SET_DEST (x))
8658 && REGNO (SET_DEST (x)) <= LR_REGNUM)
8659 *cost -= 1;
8660 x = SET_SRC (x);
8661 /* Immediate moves with an immediate in the range [0, 255] can be
8662 encoded in 16 bits in Thumb mode. */
8663 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
8664 && INTVAL (x) >= 0 && INTVAL (x) <=255)
8665 *cost >>= 1;
8666 goto const_int_cost;
8669 return false;
8671 case MEM:
8672 /* A memory access costs 1 insn if the mode is small, or the address is
8673 a single register, otherwise it costs one insn per word. */
8674 if (REG_P (XEXP (x, 0)))
8675 *cost = COSTS_N_INSNS (1);
8676 else if (flag_pic
8677 && GET_CODE (XEXP (x, 0)) == PLUS
8678 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8679 /* This will be split into two instructions.
8680 See arm.md:calculate_pic_address. */
8681 *cost = COSTS_N_INSNS (2);
8682 else
8683 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8685 /* For speed optimizations, add the costs of the address and
8686 accessing memory. */
8687 if (speed_p)
8688 #ifdef NOT_YET
8689 *cost += (extra_cost->ldst.load
8690 + arm_address_cost (XEXP (x, 0), mode,
8691 ADDR_SPACE_GENERIC, speed_p));
8692 #else
8693 *cost += extra_cost->ldst.load;
8694 #endif
8695 return true;
8697 case PARALLEL:
8699 /* Calculations of LDM costs are complex. We assume an initial cost
8700 (ldm_1st) which will load the number of registers mentioned in
8701 ldm_regs_per_insn_1st registers; then each additional
8702 ldm_regs_per_insn_subsequent registers cost one more insn. The
8703 formula for N regs is thus:
8705 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8706 + ldm_regs_per_insn_subsequent - 1)
8707 / ldm_regs_per_insn_subsequent).
8709 Additional costs may also be added for addressing. A similar
8710 formula is used for STM. */
8712 bool is_ldm = load_multiple_operation (x, SImode);
8713 bool is_stm = store_multiple_operation (x, SImode);
8715 *cost = COSTS_N_INSNS (1);
8717 if (is_ldm || is_stm)
8719 if (speed_p)
8721 HOST_WIDE_INT nregs = XVECLEN (x, 0);
8722 HOST_WIDE_INT regs_per_insn_1st = is_ldm
8723 ? extra_cost->ldst.ldm_regs_per_insn_1st
8724 : extra_cost->ldst.stm_regs_per_insn_1st;
8725 HOST_WIDE_INT regs_per_insn_sub = is_ldm
8726 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
8727 : extra_cost->ldst.stm_regs_per_insn_subsequent;
8729 *cost += regs_per_insn_1st
8730 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
8731 + regs_per_insn_sub - 1)
8732 / regs_per_insn_sub);
8733 return true;
8737 return false;
8739 case DIV:
8740 case UDIV:
8741 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8742 && (mode == SFmode || !TARGET_VFP_SINGLE))
8743 *cost = COSTS_N_INSNS (speed_p
8744 ? extra_cost->fp[mode != SFmode].div : 1);
8745 else if (mode == SImode && TARGET_IDIV)
8746 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
8747 else
8748 *cost = LIBCALL_COST (2);
8749 return false; /* All arguments must be in registers. */
8751 case MOD:
8752 case UMOD:
8753 *cost = LIBCALL_COST (2);
8754 return false; /* All arguments must be in registers. */
8756 case ROTATE:
8757 if (mode == SImode && REG_P (XEXP (x, 1)))
8759 *cost = (COSTS_N_INSNS (2)
8760 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8761 if (speed_p)
8762 *cost += extra_cost->alu.shift_reg;
8763 return true;
8765 /* Fall through */
8766 case ROTATERT:
8767 case ASHIFT:
8768 case LSHIFTRT:
8769 case ASHIFTRT:
8770 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8772 *cost = (COSTS_N_INSNS (3)
8773 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8774 if (speed_p)
8775 *cost += 2 * extra_cost->alu.shift;
8776 return true;
8778 else if (mode == SImode)
8780 *cost = (COSTS_N_INSNS (1)
8781 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8782 /* Slightly disparage register shifts at -Os, but not by much. */
8783 if (!CONST_INT_P (XEXP (x, 1)))
8784 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8785 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
8786 return true;
8788 else if (GET_MODE_CLASS (mode) == MODE_INT
8789 && GET_MODE_SIZE (mode) < 4)
8791 if (code == ASHIFT)
8793 *cost = (COSTS_N_INSNS (1)
8794 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8795 /* Slightly disparage register shifts at -Os, but not by
8796 much. */
8797 if (!CONST_INT_P (XEXP (x, 1)))
8798 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8799 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
8801 else if (code == LSHIFTRT || code == ASHIFTRT)
8803 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
8805 /* Can use SBFX/UBFX. */
8806 *cost = COSTS_N_INSNS (1);
8807 if (speed_p)
8808 *cost += extra_cost->alu.bfx;
8809 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
8811 else
8813 *cost = COSTS_N_INSNS (2);
8814 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
8815 if (speed_p)
8817 if (CONST_INT_P (XEXP (x, 1)))
8818 *cost += 2 * extra_cost->alu.shift;
8819 else
8820 *cost += (extra_cost->alu.shift
8821 + extra_cost->alu.shift_reg);
8823 else
8824 /* Slightly disparage register shifts. */
8825 *cost += !CONST_INT_P (XEXP (x, 1));
8828 else /* Rotates. */
8830 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
8831 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
8832 if (speed_p)
8834 if (CONST_INT_P (XEXP (x, 1)))
8835 *cost += (2 * extra_cost->alu.shift
8836 + extra_cost->alu.log_shift);
8837 else
8838 *cost += (extra_cost->alu.shift
8839 + extra_cost->alu.shift_reg
8840 + extra_cost->alu.log_shift_reg);
8843 return true;
8846 *cost = LIBCALL_COST (2);
8847 return false;
8849 case MINUS:
8850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8851 && (mode == SFmode || !TARGET_VFP_SINGLE))
8853 *cost = COSTS_N_INSNS (1);
8854 if (GET_CODE (XEXP (x, 0)) == MULT
8855 || GET_CODE (XEXP (x, 1)) == MULT)
8857 rtx mul_op0, mul_op1, sub_op;
8859 if (speed_p)
8860 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
8862 if (GET_CODE (XEXP (x, 0)) == MULT)
8864 mul_op0 = XEXP (XEXP (x, 0), 0);
8865 mul_op1 = XEXP (XEXP (x, 0), 1);
8866 sub_op = XEXP (x, 1);
8868 else
8870 mul_op0 = XEXP (XEXP (x, 1), 0);
8871 mul_op1 = XEXP (XEXP (x, 1), 1);
8872 sub_op = XEXP (x, 0);
8875 /* The first operand of the multiply may be optionally
8876 negated. */
8877 if (GET_CODE (mul_op0) == NEG)
8878 mul_op0 = XEXP (mul_op0, 0);
8880 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
8881 + rtx_cost (mul_op1, code, 0, speed_p)
8882 + rtx_cost (sub_op, code, 0, speed_p));
8884 return true;
8887 if (speed_p)
8888 *cost += extra_cost->fp[mode != SFmode].addsub;
8889 return false;
8892 if (mode == SImode)
8894 rtx shift_by_reg = NULL;
8895 rtx shift_op;
8896 rtx non_shift_op;
8898 *cost = COSTS_N_INSNS (1);
8900 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
8901 if (shift_op == NULL)
8903 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
8904 non_shift_op = XEXP (x, 0);
8906 else
8907 non_shift_op = XEXP (x, 1);
8909 if (shift_op != NULL)
8911 if (shift_by_reg != NULL)
8913 if (speed_p)
8914 *cost += extra_cost->alu.arith_shift_reg;
8915 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
8917 else if (speed_p)
8918 *cost += extra_cost->alu.arith_shift;
8920 *cost += (rtx_cost (shift_op, code, 0, speed_p)
8921 + rtx_cost (non_shift_op, code, 0, speed_p));
8922 return true;
8925 if (arm_arch_thumb2
8926 && GET_CODE (XEXP (x, 1)) == MULT)
8928 /* MLS. */
8929 if (speed_p)
8930 *cost += extra_cost->mult[0].add;
8931 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
8932 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
8933 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
8934 return true;
8937 if (CONST_INT_P (XEXP (x, 0)))
8939 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
8940 INTVAL (XEXP (x, 0)), NULL_RTX,
8941 NULL_RTX, 1, 0);
8942 *cost = COSTS_N_INSNS (insns);
8943 if (speed_p)
8944 *cost += insns * extra_cost->alu.arith;
8945 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
8946 return true;
8949 return false;
8952 if (GET_MODE_CLASS (mode) == MODE_INT
8953 && GET_MODE_SIZE (mode) < 4)
8955 /* Slightly disparage, as we might need to widen the result. */
8956 *cost = 1 + COSTS_N_INSNS (1);
8957 if (speed_p)
8958 *cost += extra_cost->alu.arith;
8960 if (CONST_INT_P (XEXP (x, 0)))
8962 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
8963 return true;
8966 return false;
8969 if (mode == DImode)
8971 *cost = COSTS_N_INSNS (2);
8973 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
8975 rtx op1 = XEXP (x, 1);
8977 if (speed_p)
8978 *cost += 2 * extra_cost->alu.arith;
8980 if (GET_CODE (op1) == ZERO_EXTEND)
8981 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
8982 else
8983 *cost += rtx_cost (op1, MINUS, 1, speed_p);
8984 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
8985 0, speed_p);
8986 return true;
8988 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
8990 if (speed_p)
8991 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
8992 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
8993 0, speed_p)
8994 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
8995 return true;
8997 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
8998 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9000 if (speed_p)
9001 *cost += (extra_cost->alu.arith
9002 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9003 ? extra_cost->alu.arith
9004 : extra_cost->alu.arith_shift));
9005 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9006 + rtx_cost (XEXP (XEXP (x, 1), 0),
9007 GET_CODE (XEXP (x, 1)), 0, speed_p));
9008 return true;
9011 if (speed_p)
9012 *cost += 2 * extra_cost->alu.arith;
9013 return false;
9016 /* Vector mode? */
9018 *cost = LIBCALL_COST (2);
9019 return false;
9021 case PLUS:
9022 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9023 && (mode == SFmode || !TARGET_VFP_SINGLE))
9025 *cost = COSTS_N_INSNS (1);
9026 if (GET_CODE (XEXP (x, 0)) == MULT)
9028 rtx mul_op0, mul_op1, add_op;
9030 if (speed_p)
9031 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9033 mul_op0 = XEXP (XEXP (x, 0), 0);
9034 mul_op1 = XEXP (XEXP (x, 0), 1);
9035 add_op = XEXP (x, 1);
9037 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9038 + rtx_cost (mul_op1, code, 0, speed_p)
9039 + rtx_cost (add_op, code, 0, speed_p));
9041 return true;
9044 if (speed_p)
9045 *cost += extra_cost->fp[mode != SFmode].addsub;
9046 return false;
9048 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9050 *cost = LIBCALL_COST (2);
9051 return false;
9054 if (GET_MODE_CLASS (mode) == MODE_INT
9055 && GET_MODE_SIZE (mode) < 4)
9057 /* Narrow modes can be synthesized in SImode, but the range
9058 of useful sub-operations is limited. */
9059 if (CONST_INT_P (XEXP (x, 1)))
9061 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9062 INTVAL (XEXP (x, 1)), NULL_RTX,
9063 NULL_RTX, 1, 0);
9064 *cost = COSTS_N_INSNS (insns);
9065 if (speed_p)
9066 *cost += insns * extra_cost->alu.arith;
9067 /* Slightly penalize a narrow operation as the result may
9068 need widening. */
9069 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9070 return true;
9073 /* Slightly penalize a narrow operation as the result may
9074 need widening. */
9075 *cost = 1 + COSTS_N_INSNS (1);
9076 if (speed_p)
9077 *cost += extra_cost->alu.arith;
9079 return false;
9082 if (mode == SImode)
9084 rtx shift_op, shift_reg;
9086 *cost = COSTS_N_INSNS (1);
9087 if (TARGET_INT_SIMD
9088 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9089 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9091 /* UXTA[BH] or SXTA[BH]. */
9092 if (speed_p)
9093 *cost += extra_cost->alu.extnd_arith;
9094 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9095 speed_p)
9096 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9097 return true;
9100 shift_reg = NULL;
9101 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9102 if (shift_op != NULL)
9104 if (shift_reg)
9106 if (speed_p)
9107 *cost += extra_cost->alu.arith_shift_reg;
9108 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9110 else if (speed_p)
9111 *cost += extra_cost->alu.arith_shift;
9113 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9114 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9115 return true;
9117 if (GET_CODE (XEXP (x, 0)) == MULT)
9119 rtx mul_op = XEXP (x, 0);
9121 *cost = COSTS_N_INSNS (1);
9123 if (TARGET_DSP_MULTIPLY
9124 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9125 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9126 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9127 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9128 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9129 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9130 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9131 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9132 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9133 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9134 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9135 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9136 == 16))))))
9138 /* SMLA[BT][BT]. */
9139 if (speed_p)
9140 *cost += extra_cost->mult[0].extend_add;
9141 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9142 SIGN_EXTEND, 0, speed_p)
9143 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9144 SIGN_EXTEND, 0, speed_p)
9145 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9146 return true;
9149 if (speed_p)
9150 *cost += extra_cost->mult[0].add;
9151 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9152 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9153 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9154 return true;
9156 if (CONST_INT_P (XEXP (x, 1)))
9158 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9159 INTVAL (XEXP (x, 1)), NULL_RTX,
9160 NULL_RTX, 1, 0);
9161 *cost = COSTS_N_INSNS (insns);
9162 if (speed_p)
9163 *cost += insns * extra_cost->alu.arith;
9164 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9165 return true;
9167 return false;
9170 if (mode == DImode)
9172 if (arm_arch3m
9173 && GET_CODE (XEXP (x, 0)) == MULT
9174 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9175 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9176 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9177 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9179 *cost = COSTS_N_INSNS (1);
9180 if (speed_p)
9181 *cost += extra_cost->mult[1].extend_add;
9182 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9183 ZERO_EXTEND, 0, speed_p)
9184 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9185 ZERO_EXTEND, 0, speed_p)
9186 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9187 return true;
9190 *cost = COSTS_N_INSNS (2);
9192 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9193 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9195 if (speed_p)
9196 *cost += (extra_cost->alu.arith
9197 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9198 ? extra_cost->alu.arith
9199 : extra_cost->alu.arith_shift));
9201 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9202 speed_p)
9203 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9204 return true;
9207 if (speed_p)
9208 *cost += 2 * extra_cost->alu.arith;
9209 return false;
9212 /* Vector mode? */
9213 *cost = LIBCALL_COST (2);
9214 return false;
9216 case AND: case XOR: case IOR:
9217 if (mode == SImode)
9219 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9220 rtx op0 = XEXP (x, 0);
9221 rtx shift_op, shift_reg;
9223 *cost = COSTS_N_INSNS (1);
9225 if (subcode == NOT
9226 && (code == AND
9227 || (code == IOR && TARGET_THUMB2)))
9228 op0 = XEXP (op0, 0);
9230 shift_reg = NULL;
9231 shift_op = shifter_op_p (op0, &shift_reg);
9232 if (shift_op != NULL)
9234 if (shift_reg)
9236 if (speed_p)
9237 *cost += extra_cost->alu.log_shift_reg;
9238 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9240 else if (speed_p)
9241 *cost += extra_cost->alu.log_shift;
9243 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9244 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9245 return true;
9248 if (CONST_INT_P (XEXP (x, 1)))
9250 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9251 INTVAL (XEXP (x, 1)), NULL_RTX,
9252 NULL_RTX, 1, 0);
9254 *cost = COSTS_N_INSNS (insns);
9255 if (speed_p)
9256 *cost += insns * extra_cost->alu.logical;
9257 *cost += rtx_cost (op0, code, 0, speed_p);
9258 return true;
9261 if (speed_p)
9262 *cost += extra_cost->alu.logical;
9263 *cost += (rtx_cost (op0, code, 0, speed_p)
9264 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9265 return true;
9268 if (mode == DImode)
9270 rtx op0 = XEXP (x, 0);
9271 enum rtx_code subcode = GET_CODE (op0);
9273 *cost = COSTS_N_INSNS (2);
9275 if (subcode == NOT
9276 && (code == AND
9277 || (code == IOR && TARGET_THUMB2)))
9278 op0 = XEXP (op0, 0);
9280 if (GET_CODE (op0) == ZERO_EXTEND)
9282 if (speed_p)
9283 *cost += 2 * extra_cost->alu.logical;
9285 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9286 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9287 return true;
9289 else if (GET_CODE (op0) == SIGN_EXTEND)
9291 if (speed_p)
9292 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9294 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9295 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9296 return true;
9299 if (speed_p)
9300 *cost += 2 * extra_cost->alu.logical;
9302 return true;
9304 /* Vector mode? */
9306 *cost = LIBCALL_COST (2);
9307 return false;
9309 case MULT:
9310 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9311 && (mode == SFmode || !TARGET_VFP_SINGLE))
9313 rtx op0 = XEXP (x, 0);
9315 *cost = COSTS_N_INSNS (1);
9317 if (GET_CODE (op0) == NEG)
9318 op0 = XEXP (op0, 0);
9320 if (speed_p)
9321 *cost += extra_cost->fp[mode != SFmode].mult;
9323 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9324 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9325 return true;
9327 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9329 *cost = LIBCALL_COST (2);
9330 return false;
9333 if (mode == SImode)
9335 *cost = COSTS_N_INSNS (1);
9336 if (TARGET_DSP_MULTIPLY
9337 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9338 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9339 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9340 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9341 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9342 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9343 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9344 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9345 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9346 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9347 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9348 && (INTVAL (XEXP (XEXP (x, 1), 1))
9349 == 16))))))
9351 /* SMUL[TB][TB]. */
9352 if (speed_p)
9353 *cost += extra_cost->mult[0].extend;
9354 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9355 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9356 return true;
9358 if (speed_p)
9359 *cost += extra_cost->mult[0].simple;
9360 return false;
9363 if (mode == DImode)
9365 if (arm_arch3m
9366 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9367 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9368 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9369 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9371 *cost = COSTS_N_INSNS (1);
9372 if (speed_p)
9373 *cost += extra_cost->mult[1].extend;
9374 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9375 ZERO_EXTEND, 0, speed_p)
9376 + rtx_cost (XEXP (XEXP (x, 1), 0),
9377 ZERO_EXTEND, 0, speed_p));
9378 return true;
9381 *cost = LIBCALL_COST (2);
9382 return false;
9385 /* Vector mode? */
9386 *cost = LIBCALL_COST (2);
9387 return false;
9389 case NEG:
9390 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9391 && (mode == SFmode || !TARGET_VFP_SINGLE))
9393 *cost = COSTS_N_INSNS (1);
9394 if (speed_p)
9395 *cost += extra_cost->fp[mode != SFmode].neg;
9397 return false;
9399 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9401 *cost = LIBCALL_COST (1);
9402 return false;
9405 if (mode == SImode)
9407 if (GET_CODE (XEXP (x, 0)) == ABS)
9409 *cost = COSTS_N_INSNS (2);
9410 /* Assume the non-flag-changing variant. */
9411 if (speed_p)
9412 *cost += (extra_cost->alu.log_shift
9413 + extra_cost->alu.arith_shift);
9414 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9415 return true;
9418 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9419 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9421 *cost = COSTS_N_INSNS (2);
9422 /* No extra cost for MOV imm and MVN imm. */
9423 /* If the comparison op is using the flags, there's no further
9424 cost, otherwise we need to add the cost of the comparison. */
9425 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9426 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9427 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9429 *cost += (COSTS_N_INSNS (1)
9430 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9431 speed_p)
9432 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9433 speed_p));
9434 if (speed_p)
9435 *cost += extra_cost->alu.arith;
9437 return true;
9439 *cost = COSTS_N_INSNS (1);
9440 if (speed_p)
9441 *cost += extra_cost->alu.arith;
9442 return false;
9445 if (GET_MODE_CLASS (mode) == MODE_INT
9446 && GET_MODE_SIZE (mode) < 4)
9448 /* Slightly disparage, as we might need an extend operation. */
9449 *cost = 1 + COSTS_N_INSNS (1);
9450 if (speed_p)
9451 *cost += extra_cost->alu.arith;
9452 return false;
9455 if (mode == DImode)
9457 *cost = COSTS_N_INSNS (2);
9458 if (speed_p)
9459 *cost += 2 * extra_cost->alu.arith;
9460 return false;
9463 /* Vector mode? */
9464 *cost = LIBCALL_COST (1);
9465 return false;
9467 case NOT:
9468 if (mode == SImode)
9470 rtx shift_op;
9471 rtx shift_reg = NULL;
9473 *cost = COSTS_N_INSNS (1);
9474 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9476 if (shift_op)
9478 if (shift_reg != NULL)
9480 if (speed_p)
9481 *cost += extra_cost->alu.log_shift_reg;
9482 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9484 else if (speed_p)
9485 *cost += extra_cost->alu.log_shift;
9486 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9487 return true;
9490 if (speed_p)
9491 *cost += extra_cost->alu.logical;
9492 return false;
9494 if (mode == DImode)
9496 *cost = COSTS_N_INSNS (2);
9497 return false;
9500 /* Vector mode? */
9502 *cost += LIBCALL_COST (1);
9503 return false;
9505 case IF_THEN_ELSE:
9507 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9509 *cost = COSTS_N_INSNS (4);
9510 return true;
9512 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9513 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9515 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9516 /* Assume that if one arm of the if_then_else is a register,
9517 that it will be tied with the result and eliminate the
9518 conditional insn. */
9519 if (REG_P (XEXP (x, 1)))
9520 *cost += op2cost;
9521 else if (REG_P (XEXP (x, 2)))
9522 *cost += op1cost;
9523 else
9525 if (speed_p)
9527 if (extra_cost->alu.non_exec_costs_exec)
9528 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
9529 else
9530 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
9532 else
9533 *cost += op1cost + op2cost;
9536 return true;
9538 case COMPARE:
9539 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
9540 *cost = 0;
9541 else
9543 enum machine_mode op0mode;
9544 /* We'll mostly assume that the cost of a compare is the cost of the
9545 LHS. However, there are some notable exceptions. */
9547 /* Floating point compares are never done as side-effects. */
9548 op0mode = GET_MODE (XEXP (x, 0));
9549 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
9550 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
9552 *cost = COSTS_N_INSNS (1);
9553 if (speed_p)
9554 *cost += extra_cost->fp[op0mode != SFmode].compare;
9556 if (XEXP (x, 1) == CONST0_RTX (op0mode))
9558 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9559 return true;
9562 return false;
9564 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
9566 *cost = LIBCALL_COST (2);
9567 return false;
9570 /* DImode compares normally take two insns. */
9571 if (op0mode == DImode)
9573 *cost = COSTS_N_INSNS (2);
9574 if (speed_p)
9575 *cost += 2 * extra_cost->alu.arith;
9576 return false;
9579 if (op0mode == SImode)
9581 rtx shift_op;
9582 rtx shift_reg;
9584 if (XEXP (x, 1) == const0_rtx
9585 && !(REG_P (XEXP (x, 0))
9586 || (GET_CODE (XEXP (x, 0)) == SUBREG
9587 && REG_P (SUBREG_REG (XEXP (x, 0))))))
9589 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
9591 /* Multiply operations that set the flags are often
9592 significantly more expensive. */
9593 if (speed_p
9594 && GET_CODE (XEXP (x, 0)) == MULT
9595 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
9596 *cost += extra_cost->mult[0].flag_setting;
9598 if (speed_p
9599 && GET_CODE (XEXP (x, 0)) == PLUS
9600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9601 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
9602 0), 1), mode))
9603 *cost += extra_cost->mult[0].flag_setting;
9604 return true;
9607 shift_reg = NULL;
9608 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9609 if (shift_op != NULL)
9611 *cost = COSTS_N_INSNS (1);
9612 if (shift_reg != NULL)
9614 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9615 if (speed_p)
9616 *cost += extra_cost->alu.arith_shift_reg;
9618 else if (speed_p)
9619 *cost += extra_cost->alu.arith_shift;
9620 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9621 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
9622 return true;
9625 *cost = COSTS_N_INSNS (1);
9626 if (speed_p)
9627 *cost += extra_cost->alu.arith;
9628 if (CONST_INT_P (XEXP (x, 1))
9629 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9631 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
9632 return true;
9634 return false;
9637 /* Vector mode? */
9639 *cost = LIBCALL_COST (2);
9640 return false;
9642 return true;
9644 case EQ:
9645 case NE:
9646 case LT:
9647 case LE:
9648 case GT:
9649 case GE:
9650 case LTU:
9651 case LEU:
9652 case GEU:
9653 case GTU:
9654 case ORDERED:
9655 case UNORDERED:
9656 case UNEQ:
9657 case UNLE:
9658 case UNLT:
9659 case UNGE:
9660 case UNGT:
9661 case LTGT:
9662 if (outer_code == SET)
9664 /* Is it a store-flag operation? */
9665 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9666 && XEXP (x, 1) == const0_rtx)
9668 /* Thumb also needs an IT insn. */
9669 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
9670 return true;
9672 if (XEXP (x, 1) == const0_rtx)
9674 switch (code)
9676 case LT:
9677 /* LSR Rd, Rn, #31. */
9678 *cost = COSTS_N_INSNS (1);
9679 if (speed_p)
9680 *cost += extra_cost->alu.shift;
9681 break;
9683 case EQ:
9684 /* RSBS T1, Rn, #0
9685 ADC Rd, Rn, T1. */
9687 case NE:
9688 /* SUBS T1, Rn, #1
9689 SBC Rd, Rn, T1. */
9690 *cost = COSTS_N_INSNS (2);
9691 break;
9693 case LE:
9694 /* RSBS T1, Rn, Rn, LSR #31
9695 ADC Rd, Rn, T1. */
9696 *cost = COSTS_N_INSNS (2);
9697 if (speed_p)
9698 *cost += extra_cost->alu.arith_shift;
9699 break;
9701 case GT:
9702 /* RSB Rd, Rn, Rn, ASR #1
9703 LSR Rd, Rd, #31. */
9704 *cost = COSTS_N_INSNS (2);
9705 if (speed_p)
9706 *cost += (extra_cost->alu.arith_shift
9707 + extra_cost->alu.shift);
9708 break;
9710 case GE:
9711 /* ASR Rd, Rn, #31
9712 ADD Rd, Rn, #1. */
9713 *cost = COSTS_N_INSNS (2);
9714 if (speed_p)
9715 *cost += extra_cost->alu.shift;
9716 break;
9718 default:
9719 /* Remaining cases are either meaningless or would take
9720 three insns anyway. */
9721 *cost = COSTS_N_INSNS (3);
9722 break;
9724 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9725 return true;
9727 else
9729 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
9730 if (CONST_INT_P (XEXP (x, 1))
9731 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9733 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9734 return true;
9737 return false;
9740 /* Not directly inside a set. If it involves the condition code
9741 register it must be the condition for a branch, cond_exec or
9742 I_T_E operation. Since the comparison is performed elsewhere
9743 this is just the control part which has no additional
9744 cost. */
9745 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9746 && XEXP (x, 1) == const0_rtx)
9748 *cost = 0;
9749 return true;
9752 case ABS:
9753 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9754 && (mode == SFmode || !TARGET_VFP_SINGLE))
9756 *cost = COSTS_N_INSNS (1);
9757 if (speed_p)
9758 *cost += extra_cost->fp[mode != SFmode].neg;
9760 return false;
9762 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9764 *cost = LIBCALL_COST (1);
9765 return false;
9768 if (mode == SImode)
9770 *cost = COSTS_N_INSNS (1);
9771 if (speed_p)
9772 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
9773 return false;
9775 /* Vector mode? */
9776 *cost = LIBCALL_COST (1);
9777 return false;
9779 case SIGN_EXTEND:
9780 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
9781 && MEM_P (XEXP (x, 0)))
9783 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
9785 if (mode == DImode)
9786 *cost += COSTS_N_INSNS (1);
9788 if (!speed_p)
9789 return true;
9791 if (GET_MODE (XEXP (x, 0)) == SImode)
9792 *cost += extra_cost->ldst.load;
9793 else
9794 *cost += extra_cost->ldst.load_sign_extend;
9796 if (mode == DImode)
9797 *cost += extra_cost->alu.shift;
9799 return true;
9802 /* Widening from less than 32-bits requires an extend operation. */
9803 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
9805 /* We have SXTB/SXTH. */
9806 *cost = COSTS_N_INSNS (1);
9807 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9808 if (speed_p)
9809 *cost += extra_cost->alu.extnd;
9811 else if (GET_MODE (XEXP (x, 0)) != SImode)
9813 /* Needs two shifts. */
9814 *cost = COSTS_N_INSNS (2);
9815 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9816 if (speed_p)
9817 *cost += 2 * extra_cost->alu.shift;
9820 /* Widening beyond 32-bits requires one more insn. */
9821 if (mode == DImode)
9823 *cost += COSTS_N_INSNS (1);
9824 if (speed_p)
9825 *cost += extra_cost->alu.shift;
9828 return true;
9830 case ZERO_EXTEND:
9831 if ((arm_arch4
9832 || GET_MODE (XEXP (x, 0)) == SImode
9833 || GET_MODE (XEXP (x, 0)) == QImode)
9834 && MEM_P (XEXP (x, 0)))
9836 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
9838 if (mode == DImode)
9839 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
9841 return true;
9844 /* Widening from less than 32-bits requires an extend operation. */
9845 if (GET_MODE (XEXP (x, 0)) == QImode)
9847 /* UXTB can be a shorter instruction in Thumb2, but it might
9848 be slower than the AND Rd, Rn, #255 alternative. When
9849 optimizing for speed it should never be slower to use
9850 AND, and we don't really model 16-bit vs 32-bit insns
9851 here. */
9852 *cost = COSTS_N_INSNS (1);
9853 if (speed_p)
9854 *cost += extra_cost->alu.logical;
9856 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
9858 /* We have UXTB/UXTH. */
9859 *cost = COSTS_N_INSNS (1);
9860 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9861 if (speed_p)
9862 *cost += extra_cost->alu.extnd;
9864 else if (GET_MODE (XEXP (x, 0)) != SImode)
9866 /* Needs two shifts. It's marginally preferable to use
9867 shifts rather than two BIC instructions as the second
9868 shift may merge with a subsequent insn as a shifter
9869 op. */
9870 *cost = COSTS_N_INSNS (2);
9871 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9872 if (speed_p)
9873 *cost += 2 * extra_cost->alu.shift;
9876 /* Widening beyond 32-bits requires one more insn. */
9877 if (mode == DImode)
9879 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
9882 return true;
9884 case CONST_INT:
9885 *cost = 0;
9886 /* CONST_INT has no mode, so we cannot tell for sure how many
9887 insns are really going to be needed. The best we can do is
9888 look at the value passed. If it fits in SImode, then assume
9889 that's the mode it will be used for. Otherwise assume it
9890 will be used in DImode. */
9891 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
9892 mode = SImode;
9893 else
9894 mode = DImode;
9896 /* Avoid blowing up in arm_gen_constant (). */
9897 if (!(outer_code == PLUS
9898 || outer_code == AND
9899 || outer_code == IOR
9900 || outer_code == XOR
9901 || outer_code == MINUS))
9902 outer_code = SET;
9904 const_int_cost:
9905 if (mode == SImode)
9907 *cost += 0;
9908 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
9909 INTVAL (x), NULL, NULL,
9910 0, 0));
9911 /* Extra costs? */
9913 else
9915 *cost += COSTS_N_INSNS (arm_gen_constant
9916 (outer_code, SImode, NULL,
9917 trunc_int_for_mode (INTVAL (x), SImode),
9918 NULL, NULL, 0, 0)
9919 + arm_gen_constant (outer_code, SImode, NULL,
9920 INTVAL (x) >> 32, NULL,
9921 NULL, 0, 0));
9922 /* Extra costs? */
9925 return true;
9927 case CONST:
9928 case LABEL_REF:
9929 case SYMBOL_REF:
9930 if (speed_p)
9932 if (arm_arch_thumb2 && !flag_pic)
9933 *cost = COSTS_N_INSNS (2);
9934 else
9935 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
9937 else
9938 *cost = COSTS_N_INSNS (2);
9940 if (flag_pic)
9942 *cost += COSTS_N_INSNS (1);
9943 if (speed_p)
9944 *cost += extra_cost->alu.arith;
9947 return true;
9949 case CONST_FIXED:
9950 *cost = COSTS_N_INSNS (4);
9951 /* Fixme. */
9952 return true;
9954 case CONST_DOUBLE:
9955 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9956 && (mode == SFmode || !TARGET_VFP_SINGLE))
9958 if (vfp3_const_double_rtx (x))
9960 *cost = COSTS_N_INSNS (1);
9961 if (speed_p)
9962 *cost += extra_cost->fp[mode == DFmode].fpconst;
9963 return true;
9966 if (speed_p)
9968 *cost = COSTS_N_INSNS (1);
9969 if (mode == DFmode)
9970 *cost += extra_cost->ldst.loadd;
9971 else
9972 *cost += extra_cost->ldst.loadf;
9974 else
9975 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
9977 return true;
9979 *cost = COSTS_N_INSNS (4);
9980 return true;
9982 case CONST_VECTOR:
9983 /* Fixme. */
9984 if (TARGET_NEON
9985 && TARGET_HARD_FLOAT
9986 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9987 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9988 *cost = COSTS_N_INSNS (1);
9989 else
9990 *cost = COSTS_N_INSNS (4);
9991 return true;
9993 case HIGH:
9994 case LO_SUM:
9995 *cost = COSTS_N_INSNS (1);
9996 /* When optimizing for size, we prefer constant pool entries to
9997 MOVW/MOVT pairs, so bump the cost of these slightly. */
9998 if (!speed_p)
9999 *cost += 1;
10000 return true;
10002 case CLZ:
10003 *cost = COSTS_N_INSNS (1);
10004 if (speed_p)
10005 *cost += extra_cost->alu.clz;
10006 return false;
10008 case SMIN:
10009 if (XEXP (x, 1) == const0_rtx)
10011 *cost = COSTS_N_INSNS (1);
10012 if (speed_p)
10013 *cost += extra_cost->alu.log_shift;
10014 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10015 return true;
10017 /* Fall through. */
10018 case SMAX:
10019 case UMIN:
10020 case UMAX:
10021 *cost = COSTS_N_INSNS (2);
10022 return false;
10024 case TRUNCATE:
10025 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10026 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10027 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10029 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10030 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10031 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10032 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10033 == ZERO_EXTEND))))
10035 *cost = COSTS_N_INSNS (1);
10036 if (speed_p)
10037 *cost += extra_cost->mult[1].extend;
10038 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10039 speed_p)
10040 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10041 0, speed_p));
10042 return true;
10044 *cost = LIBCALL_COST (1);
10045 return false;
10047 case UNSPEC:
10048 return arm_unspec_cost (x, outer_code, speed_p, cost);
10050 case PC:
10051 /* Reading the PC is like reading any other register. Writing it
10052 is more expensive, but we take that into account elsewhere. */
10053 *cost = 0;
10054 return true;
10056 case ZERO_EXTRACT:
10057 /* TODO: Simple zero_extract of bottom bits using AND. */
10058 /* Fall through. */
10059 case SIGN_EXTRACT:
10060 if (arm_arch6
10061 && mode == SImode
10062 && CONST_INT_P (XEXP (x, 1))
10063 && CONST_INT_P (XEXP (x, 2)))
10065 *cost = COSTS_N_INSNS (1);
10066 if (speed_p)
10067 *cost += extra_cost->alu.bfx;
10068 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10069 return true;
10071 /* Without UBFX/SBFX, need to resort to shift operations. */
10072 *cost = COSTS_N_INSNS (2);
10073 if (speed_p)
10074 *cost += 2 * extra_cost->alu.shift;
10075 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10076 return true;
10078 case FLOAT_EXTEND:
10079 if (TARGET_HARD_FLOAT)
10081 *cost = COSTS_N_INSNS (1);
10082 if (speed_p)
10083 *cost += extra_cost->fp[mode == DFmode].widen;
10084 if (!TARGET_FPU_ARMV8
10085 && GET_MODE (XEXP (x, 0)) == HFmode)
10087 /* Pre v8, widening HF->DF is a two-step process, first
10088 widening to SFmode. */
10089 *cost += COSTS_N_INSNS (1);
10090 if (speed_p)
10091 *cost += extra_cost->fp[0].widen;
10093 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10094 return true;
10097 *cost = LIBCALL_COST (1);
10098 return false;
10100 case FLOAT_TRUNCATE:
10101 if (TARGET_HARD_FLOAT)
10103 *cost = COSTS_N_INSNS (1);
10104 if (speed_p)
10105 *cost += extra_cost->fp[mode == DFmode].narrow;
10106 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10107 return true;
10108 /* Vector modes? */
10110 *cost = LIBCALL_COST (1);
10111 return false;
10113 case FIX:
10114 case UNSIGNED_FIX:
10115 if (TARGET_HARD_FLOAT)
10117 if (GET_MODE_CLASS (mode) == MODE_INT)
10119 *cost = COSTS_N_INSNS (1);
10120 if (speed_p)
10121 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10122 /* Strip of the 'cost' of rounding towards zero. */
10123 if (GET_CODE (XEXP (x, 0)) == FIX)
10124 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10125 else
10126 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10127 /* ??? Increase the cost to deal with transferring from
10128 FP -> CORE registers? */
10129 return true;
10131 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10132 && TARGET_FPU_ARMV8)
10134 *cost = COSTS_N_INSNS (1);
10135 if (speed_p)
10136 *cost += extra_cost->fp[mode == DFmode].roundint;
10137 return false;
10139 /* Vector costs? */
10141 *cost = LIBCALL_COST (1);
10142 return false;
10144 case FLOAT:
10145 case UNSIGNED_FLOAT:
10146 if (TARGET_HARD_FLOAT)
10148 /* ??? Increase the cost to deal with transferring from CORE
10149 -> FP registers? */
10150 *cost = COSTS_N_INSNS (1);
10151 if (speed_p)
10152 *cost += extra_cost->fp[mode == DFmode].fromint;
10153 return false;
10155 *cost = LIBCALL_COST (1);
10156 return false;
10158 case CALL:
10159 *cost = COSTS_N_INSNS (1);
10160 return true;
10162 case ASM_OPERANDS:
10163 /* Just a guess. Cost one insn per input. */
10164 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10165 return true;
10167 default:
10168 if (mode != VOIDmode)
10169 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10170 else
10171 *cost = COSTS_N_INSNS (4); /* Who knows? */
10172 return false;
10176 /* RTX costs when optimizing for size. */
10177 static bool
10178 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10179 int *total, bool speed)
10181 bool result;
10183 if (TARGET_OLD_RTX_COSTS
10184 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10186 /* Old way. (Deprecated.) */
10187 if (!speed)
10188 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10189 (enum rtx_code) outer_code, total);
10190 else
10191 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10192 (enum rtx_code) outer_code, total,
10193 speed);
10195 else
10197 /* New way. */
10198 if (current_tune->insn_extra_cost)
10199 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10200 (enum rtx_code) outer_code,
10201 current_tune->insn_extra_cost,
10202 total, speed);
10203 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10204 && current_tune->insn_extra_cost != NULL */
10205 else
10206 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10207 (enum rtx_code) outer_code,
10208 &generic_extra_costs, total, speed);
10211 if (dump_file && (dump_flags & TDF_DETAILS))
10213 print_rtl_single (dump_file, x);
10214 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10215 *total, result ? "final" : "partial");
10217 return result;
10220 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10221 supported on any "slowmul" cores, so it can be ignored. */
10223 static bool
10224 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10225 int *total, bool speed)
10227 enum machine_mode mode = GET_MODE (x);
10229 if (TARGET_THUMB)
10231 *total = thumb1_rtx_costs (x, code, outer_code);
10232 return true;
10235 switch (code)
10237 case MULT:
10238 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10239 || mode == DImode)
10241 *total = COSTS_N_INSNS (20);
10242 return false;
10245 if (CONST_INT_P (XEXP (x, 1)))
10247 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10248 & (unsigned HOST_WIDE_INT) 0xffffffff);
10249 int cost, const_ok = const_ok_for_arm (i);
10250 int j, booth_unit_size;
10252 /* Tune as appropriate. */
10253 cost = const_ok ? 4 : 8;
10254 booth_unit_size = 2;
10255 for (j = 0; i && j < 32; j += booth_unit_size)
10257 i >>= booth_unit_size;
10258 cost++;
10261 *total = COSTS_N_INSNS (cost);
10262 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10263 return true;
10266 *total = COSTS_N_INSNS (20);
10267 return false;
10269 default:
10270 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10275 /* RTX cost for cores with a fast multiply unit (M variants). */
10277 static bool
10278 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10279 int *total, bool speed)
10281 enum machine_mode mode = GET_MODE (x);
10283 if (TARGET_THUMB1)
10285 *total = thumb1_rtx_costs (x, code, outer_code);
10286 return true;
10289 /* ??? should thumb2 use different costs? */
10290 switch (code)
10292 case MULT:
10293 /* There is no point basing this on the tuning, since it is always the
10294 fast variant if it exists at all. */
10295 if (mode == DImode
10296 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10297 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10298 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10300 *total = COSTS_N_INSNS(2);
10301 return false;
10305 if (mode == DImode)
10307 *total = COSTS_N_INSNS (5);
10308 return false;
10311 if (CONST_INT_P (XEXP (x, 1)))
10313 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10314 & (unsigned HOST_WIDE_INT) 0xffffffff);
10315 int cost, const_ok = const_ok_for_arm (i);
10316 int j, booth_unit_size;
10318 /* Tune as appropriate. */
10319 cost = const_ok ? 4 : 8;
10320 booth_unit_size = 8;
10321 for (j = 0; i && j < 32; j += booth_unit_size)
10323 i >>= booth_unit_size;
10324 cost++;
10327 *total = COSTS_N_INSNS(cost);
10328 return false;
10331 if (mode == SImode)
10333 *total = COSTS_N_INSNS (4);
10334 return false;
10337 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10339 if (TARGET_HARD_FLOAT
10340 && (mode == SFmode
10341 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10343 *total = COSTS_N_INSNS (1);
10344 return false;
10348 /* Requires a lib call */
10349 *total = COSTS_N_INSNS (20);
10350 return false;
10352 default:
10353 return arm_rtx_costs_1 (x, outer_code, total, speed);
10358 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10359 so it can be ignored. */
10361 static bool
10362 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10363 int *total, bool speed)
10365 enum machine_mode mode = GET_MODE (x);
10367 if (TARGET_THUMB)
10369 *total = thumb1_rtx_costs (x, code, outer_code);
10370 return true;
10373 switch (code)
10375 case COMPARE:
10376 if (GET_CODE (XEXP (x, 0)) != MULT)
10377 return arm_rtx_costs_1 (x, outer_code, total, speed);
10379 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10380 will stall until the multiplication is complete. */
10381 *total = COSTS_N_INSNS (3);
10382 return false;
10384 case MULT:
10385 /* There is no point basing this on the tuning, since it is always the
10386 fast variant if it exists at all. */
10387 if (mode == DImode
10388 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10389 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10390 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10392 *total = COSTS_N_INSNS (2);
10393 return false;
10397 if (mode == DImode)
10399 *total = COSTS_N_INSNS (5);
10400 return false;
10403 if (CONST_INT_P (XEXP (x, 1)))
10405 /* If operand 1 is a constant we can more accurately
10406 calculate the cost of the multiply. The multiplier can
10407 retire 15 bits on the first cycle and a further 12 on the
10408 second. We do, of course, have to load the constant into
10409 a register first. */
10410 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10411 /* There's a general overhead of one cycle. */
10412 int cost = 1;
10413 unsigned HOST_WIDE_INT masked_const;
10415 if (i & 0x80000000)
10416 i = ~i;
10418 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10420 masked_const = i & 0xffff8000;
10421 if (masked_const != 0)
10423 cost++;
10424 masked_const = i & 0xf8000000;
10425 if (masked_const != 0)
10426 cost++;
10428 *total = COSTS_N_INSNS (cost);
10429 return false;
10432 if (mode == SImode)
10434 *total = COSTS_N_INSNS (3);
10435 return false;
10438 /* Requires a lib call */
10439 *total = COSTS_N_INSNS (20);
10440 return false;
10442 default:
10443 return arm_rtx_costs_1 (x, outer_code, total, speed);
10448 /* RTX costs for 9e (and later) cores. */
10450 static bool
10451 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10452 int *total, bool speed)
10454 enum machine_mode mode = GET_MODE (x);
10456 if (TARGET_THUMB1)
10458 switch (code)
10460 case MULT:
10461 *total = COSTS_N_INSNS (3);
10462 return true;
10464 default:
10465 *total = thumb1_rtx_costs (x, code, outer_code);
10466 return true;
10470 switch (code)
10472 case MULT:
10473 /* There is no point basing this on the tuning, since it is always the
10474 fast variant if it exists at all. */
10475 if (mode == DImode
10476 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10477 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10478 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10480 *total = COSTS_N_INSNS (2);
10481 return false;
10485 if (mode == DImode)
10487 *total = COSTS_N_INSNS (5);
10488 return false;
10491 if (mode == SImode)
10493 *total = COSTS_N_INSNS (2);
10494 return false;
10497 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10499 if (TARGET_HARD_FLOAT
10500 && (mode == SFmode
10501 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10503 *total = COSTS_N_INSNS (1);
10504 return false;
10508 *total = COSTS_N_INSNS (20);
10509 return false;
10511 default:
10512 return arm_rtx_costs_1 (x, outer_code, total, speed);
10515 /* All address computations that can be done are free, but rtx cost returns
10516 the same for practically all of them. So we weight the different types
10517 of address here in the order (most pref first):
10518 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10519 static inline int
10520 arm_arm_address_cost (rtx x)
10522 enum rtx_code c = GET_CODE (x);
10524 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10525 return 0;
10526 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10527 return 10;
10529 if (c == PLUS)
10531 if (CONST_INT_P (XEXP (x, 1)))
10532 return 2;
10534 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10535 return 3;
10537 return 4;
10540 return 6;
10543 static inline int
10544 arm_thumb_address_cost (rtx x)
10546 enum rtx_code c = GET_CODE (x);
10548 if (c == REG)
10549 return 1;
10550 if (c == PLUS
10551 && REG_P (XEXP (x, 0))
10552 && CONST_INT_P (XEXP (x, 1)))
10553 return 1;
10555 return 2;
10558 static int
10559 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
10560 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10562 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10565 /* Adjust cost hook for XScale. */
10566 static bool
10567 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10569 /* Some true dependencies can have a higher cost depending
10570 on precisely how certain input operands are used. */
10571 if (REG_NOTE_KIND(link) == 0
10572 && recog_memoized (insn) >= 0
10573 && recog_memoized (dep) >= 0)
10575 int shift_opnum = get_attr_shift (insn);
10576 enum attr_type attr_type = get_attr_type (dep);
10578 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10579 operand for INSN. If we have a shifted input operand and the
10580 instruction we depend on is another ALU instruction, then we may
10581 have to account for an additional stall. */
10582 if (shift_opnum != 0
10583 && (attr_type == TYPE_ALU_SHIFT_IMM
10584 || attr_type == TYPE_ALUS_SHIFT_IMM
10585 || attr_type == TYPE_LOGIC_SHIFT_IMM
10586 || attr_type == TYPE_LOGICS_SHIFT_IMM
10587 || attr_type == TYPE_ALU_SHIFT_REG
10588 || attr_type == TYPE_ALUS_SHIFT_REG
10589 || attr_type == TYPE_LOGIC_SHIFT_REG
10590 || attr_type == TYPE_LOGICS_SHIFT_REG
10591 || attr_type == TYPE_MOV_SHIFT
10592 || attr_type == TYPE_MVN_SHIFT
10593 || attr_type == TYPE_MOV_SHIFT_REG
10594 || attr_type == TYPE_MVN_SHIFT_REG))
10596 rtx shifted_operand;
10597 int opno;
10599 /* Get the shifted operand. */
10600 extract_insn (insn);
10601 shifted_operand = recog_data.operand[shift_opnum];
10603 /* Iterate over all the operands in DEP. If we write an operand
10604 that overlaps with SHIFTED_OPERAND, then we have increase the
10605 cost of this dependency. */
10606 extract_insn (dep);
10607 preprocess_constraints ();
10608 for (opno = 0; opno < recog_data.n_operands; opno++)
10610 /* We can ignore strict inputs. */
10611 if (recog_data.operand_type[opno] == OP_IN)
10612 continue;
10614 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10615 shifted_operand))
10617 *cost = 2;
10618 return false;
10623 return true;
10626 /* Adjust cost hook for Cortex A9. */
10627 static bool
10628 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10630 switch (REG_NOTE_KIND (link))
10632 case REG_DEP_ANTI:
10633 *cost = 0;
10634 return false;
10636 case REG_DEP_TRUE:
10637 case REG_DEP_OUTPUT:
10638 if (recog_memoized (insn) >= 0
10639 && recog_memoized (dep) >= 0)
10641 if (GET_CODE (PATTERN (insn)) == SET)
10643 if (GET_MODE_CLASS
10644 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10645 || GET_MODE_CLASS
10646 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10648 enum attr_type attr_type_insn = get_attr_type (insn);
10649 enum attr_type attr_type_dep = get_attr_type (dep);
10651 /* By default all dependencies of the form
10652 s0 = s0 <op> s1
10653 s0 = s0 <op> s2
10654 have an extra latency of 1 cycle because
10655 of the input and output dependency in this
10656 case. However this gets modeled as an true
10657 dependency and hence all these checks. */
10658 if (REG_P (SET_DEST (PATTERN (insn)))
10659 && REG_P (SET_DEST (PATTERN (dep)))
10660 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
10661 SET_DEST (PATTERN (dep))))
10663 /* FMACS is a special case where the dependent
10664 instruction can be issued 3 cycles before
10665 the normal latency in case of an output
10666 dependency. */
10667 if ((attr_type_insn == TYPE_FMACS
10668 || attr_type_insn == TYPE_FMACD)
10669 && (attr_type_dep == TYPE_FMACS
10670 || attr_type_dep == TYPE_FMACD))
10672 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
10673 *cost = insn_default_latency (dep) - 3;
10674 else
10675 *cost = insn_default_latency (dep);
10676 return false;
10678 else
10680 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
10681 *cost = insn_default_latency (dep) + 1;
10682 else
10683 *cost = insn_default_latency (dep);
10685 return false;
10690 break;
10692 default:
10693 gcc_unreachable ();
10696 return true;
10699 /* Adjust cost hook for FA726TE. */
10700 static bool
10701 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10703 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10704 have penalty of 3. */
10705 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
10706 && recog_memoized (insn) >= 0
10707 && recog_memoized (dep) >= 0
10708 && get_attr_conds (dep) == CONDS_SET)
10710 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10711 if (get_attr_conds (insn) == CONDS_USE
10712 && get_attr_type (insn) != TYPE_BRANCH)
10714 *cost = 3;
10715 return false;
10718 if (GET_CODE (PATTERN (insn)) == COND_EXEC
10719 || get_attr_conds (insn) == CONDS_USE)
10721 *cost = 0;
10722 return false;
10726 return true;
10729 /* Implement TARGET_REGISTER_MOVE_COST.
10731 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10732 it is typically more expensive than a single memory access. We set
10733 the cost to less than two memory accesses so that floating
10734 point to integer conversion does not go through memory. */
10737 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10738 reg_class_t from, reg_class_t to)
10740 if (TARGET_32BIT)
10742 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
10743 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
10744 return 15;
10745 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
10746 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
10747 return 4;
10748 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
10749 return 20;
10750 else
10751 return 2;
10753 else
10755 if (from == HI_REGS || to == HI_REGS)
10756 return 4;
10757 else
10758 return 2;
10762 /* Implement TARGET_MEMORY_MOVE_COST. */
10765 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
10766 bool in ATTRIBUTE_UNUSED)
10768 if (TARGET_32BIT)
10769 return 10;
10770 else
10772 if (GET_MODE_SIZE (mode) < 4)
10773 return 8;
10774 else
10775 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
10779 /* Vectorizer cost model implementation. */
10781 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10782 static int
10783 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10784 tree vectype,
10785 int misalign ATTRIBUTE_UNUSED)
10787 unsigned elements;
10789 switch (type_of_cost)
10791 case scalar_stmt:
10792 return current_tune->vec_costs->scalar_stmt_cost;
10794 case scalar_load:
10795 return current_tune->vec_costs->scalar_load_cost;
10797 case scalar_store:
10798 return current_tune->vec_costs->scalar_store_cost;
10800 case vector_stmt:
10801 return current_tune->vec_costs->vec_stmt_cost;
10803 case vector_load:
10804 return current_tune->vec_costs->vec_align_load_cost;
10806 case vector_store:
10807 return current_tune->vec_costs->vec_store_cost;
10809 case vec_to_scalar:
10810 return current_tune->vec_costs->vec_to_scalar_cost;
10812 case scalar_to_vec:
10813 return current_tune->vec_costs->scalar_to_vec_cost;
10815 case unaligned_load:
10816 return current_tune->vec_costs->vec_unalign_load_cost;
10818 case unaligned_store:
10819 return current_tune->vec_costs->vec_unalign_store_cost;
10821 case cond_branch_taken:
10822 return current_tune->vec_costs->cond_taken_branch_cost;
10824 case cond_branch_not_taken:
10825 return current_tune->vec_costs->cond_not_taken_branch_cost;
10827 case vec_perm:
10828 case vec_promote_demote:
10829 return current_tune->vec_costs->vec_stmt_cost;
10831 case vec_construct:
10832 elements = TYPE_VECTOR_SUBPARTS (vectype);
10833 return elements / 2 + 1;
10835 default:
10836 gcc_unreachable ();
10840 /* Implement targetm.vectorize.add_stmt_cost. */
10842 static unsigned
10843 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10844 struct _stmt_vec_info *stmt_info, int misalign,
10845 enum vect_cost_model_location where)
10847 unsigned *cost = (unsigned *) data;
10848 unsigned retval = 0;
10850 if (flag_vect_cost_model)
10852 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10853 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
10855 /* Statements in an inner loop relative to the loop being
10856 vectorized are weighted more heavily. The value here is
10857 arbitrary and could potentially be improved with analysis. */
10858 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
10859 count *= 50; /* FIXME. */
10861 retval = (unsigned) (count * stmt_cost);
10862 cost[where] += retval;
10865 return retval;
10868 /* Return true if and only if this insn can dual-issue only as older. */
10869 static bool
10870 cortexa7_older_only (rtx insn)
10872 if (recog_memoized (insn) < 0)
10873 return false;
10875 switch (get_attr_type (insn))
10877 case TYPE_ALU_REG:
10878 case TYPE_ALUS_REG:
10879 case TYPE_LOGIC_REG:
10880 case TYPE_LOGICS_REG:
10881 case TYPE_ADC_REG:
10882 case TYPE_ADCS_REG:
10883 case TYPE_ADR:
10884 case TYPE_BFM:
10885 case TYPE_REV:
10886 case TYPE_MVN_REG:
10887 case TYPE_SHIFT_IMM:
10888 case TYPE_SHIFT_REG:
10889 case TYPE_LOAD_BYTE:
10890 case TYPE_LOAD1:
10891 case TYPE_STORE1:
10892 case TYPE_FFARITHS:
10893 case TYPE_FADDS:
10894 case TYPE_FFARITHD:
10895 case TYPE_FADDD:
10896 case TYPE_FMOV:
10897 case TYPE_F_CVT:
10898 case TYPE_FCMPS:
10899 case TYPE_FCMPD:
10900 case TYPE_FCONSTS:
10901 case TYPE_FCONSTD:
10902 case TYPE_FMULS:
10903 case TYPE_FMACS:
10904 case TYPE_FMULD:
10905 case TYPE_FMACD:
10906 case TYPE_FDIVS:
10907 case TYPE_FDIVD:
10908 case TYPE_F_MRC:
10909 case TYPE_F_MRRC:
10910 case TYPE_F_FLAG:
10911 case TYPE_F_LOADS:
10912 case TYPE_F_STORES:
10913 return true;
10914 default:
10915 return false;
10919 /* Return true if and only if this insn can dual-issue as younger. */
10920 static bool
10921 cortexa7_younger (FILE *file, int verbose, rtx insn)
10923 if (recog_memoized (insn) < 0)
10925 if (verbose > 5)
10926 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
10927 return false;
10930 switch (get_attr_type (insn))
10932 case TYPE_ALU_IMM:
10933 case TYPE_ALUS_IMM:
10934 case TYPE_LOGIC_IMM:
10935 case TYPE_LOGICS_IMM:
10936 case TYPE_EXTEND:
10937 case TYPE_MVN_IMM:
10938 case TYPE_MOV_IMM:
10939 case TYPE_MOV_REG:
10940 case TYPE_MOV_SHIFT:
10941 case TYPE_MOV_SHIFT_REG:
10942 case TYPE_BRANCH:
10943 case TYPE_CALL:
10944 return true;
10945 default:
10946 return false;
10951 /* Look for an instruction that can dual issue only as an older
10952 instruction, and move it in front of any instructions that can
10953 dual-issue as younger, while preserving the relative order of all
10954 other instructions in the ready list. This is a hueuristic to help
10955 dual-issue in later cycles, by postponing issue of more flexible
10956 instructions. This heuristic may affect dual issue opportunities
10957 in the current cycle. */
10958 static void
10959 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
10960 int clock)
10962 int i;
10963 int first_older_only = -1, first_younger = -1;
10965 if (verbose > 5)
10966 fprintf (file,
10967 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10968 clock,
10969 *n_readyp);
10971 /* Traverse the ready list from the head (the instruction to issue
10972 first), and looking for the first instruction that can issue as
10973 younger and the first instruction that can dual-issue only as
10974 older. */
10975 for (i = *n_readyp - 1; i >= 0; i--)
10977 rtx insn = ready[i];
10978 if (cortexa7_older_only (insn))
10980 first_older_only = i;
10981 if (verbose > 5)
10982 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
10983 break;
10985 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
10986 first_younger = i;
10989 /* Nothing to reorder because either no younger insn found or insn
10990 that can dual-issue only as older appears before any insn that
10991 can dual-issue as younger. */
10992 if (first_younger == -1)
10994 if (verbose > 5)
10995 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
10996 return;
10999 /* Nothing to reorder because no older-only insn in the ready list. */
11000 if (first_older_only == -1)
11002 if (verbose > 5)
11003 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11004 return;
11007 /* Move first_older_only insn before first_younger. */
11008 if (verbose > 5)
11009 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11010 INSN_UID(ready [first_older_only]),
11011 INSN_UID(ready [first_younger]));
11012 rtx first_older_only_insn = ready [first_older_only];
11013 for (i = first_older_only; i < first_younger; i++)
11015 ready[i] = ready[i+1];
11018 ready[i] = first_older_only_insn;
11019 return;
11022 /* Implement TARGET_SCHED_REORDER. */
11023 static int
11024 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11025 int clock)
11027 switch (arm_tune)
11029 case cortexa7:
11030 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11031 break;
11032 default:
11033 /* Do nothing for other cores. */
11034 break;
11037 return arm_issue_rate ();
11040 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11041 It corrects the value of COST based on the relationship between
11042 INSN and DEP through the dependence LINK. It returns the new
11043 value. There is a per-core adjust_cost hook to adjust scheduler costs
11044 and the per-core hook can choose to completely override the generic
11045 adjust_cost function. Only put bits of code into arm_adjust_cost that
11046 are common across all cores. */
11047 static int
11048 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11050 rtx i_pat, d_pat;
11052 /* When generating Thumb-1 code, we want to place flag-setting operations
11053 close to a conditional branch which depends on them, so that we can
11054 omit the comparison. */
11055 if (TARGET_THUMB1
11056 && REG_NOTE_KIND (link) == 0
11057 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11058 && recog_memoized (dep) >= 0
11059 && get_attr_conds (dep) == CONDS_SET)
11060 return 0;
11062 if (current_tune->sched_adjust_cost != NULL)
11064 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11065 return cost;
11068 /* XXX Is this strictly true? */
11069 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11070 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11071 return 0;
11073 /* Call insns don't incur a stall, even if they follow a load. */
11074 if (REG_NOTE_KIND (link) == 0
11075 && CALL_P (insn))
11076 return 1;
11078 if ((i_pat = single_set (insn)) != NULL
11079 && MEM_P (SET_SRC (i_pat))
11080 && (d_pat = single_set (dep)) != NULL
11081 && MEM_P (SET_DEST (d_pat)))
11083 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11084 /* This is a load after a store, there is no conflict if the load reads
11085 from a cached area. Assume that loads from the stack, and from the
11086 constant pool are cached, and that others will miss. This is a
11087 hack. */
11089 if ((GET_CODE (src_mem) == SYMBOL_REF
11090 && CONSTANT_POOL_ADDRESS_P (src_mem))
11091 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11092 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11093 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11094 return 1;
11097 return cost;
11101 arm_max_conditional_execute (void)
11103 return max_insns_skipped;
11106 static int
11107 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11109 if (TARGET_32BIT)
11110 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11111 else
11112 return (optimize > 0) ? 2 : 0;
11115 static int
11116 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11118 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11121 static bool fp_consts_inited = false;
11123 static REAL_VALUE_TYPE value_fp0;
11125 static void
11126 init_fp_table (void)
11128 REAL_VALUE_TYPE r;
11130 r = REAL_VALUE_ATOF ("0", DFmode);
11131 value_fp0 = r;
11132 fp_consts_inited = true;
11135 /* Return TRUE if rtx X is a valid immediate FP constant. */
11137 arm_const_double_rtx (rtx x)
11139 REAL_VALUE_TYPE r;
11141 if (!fp_consts_inited)
11142 init_fp_table ();
11144 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11145 if (REAL_VALUE_MINUS_ZERO (r))
11146 return 0;
11148 if (REAL_VALUES_EQUAL (r, value_fp0))
11149 return 1;
11151 return 0;
11154 /* VFPv3 has a fairly wide range of representable immediates, formed from
11155 "quarter-precision" floating-point values. These can be evaluated using this
11156 formula (with ^ for exponentiation):
11158 -1^s * n * 2^-r
11160 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11161 16 <= n <= 31 and 0 <= r <= 7.
11163 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11165 - A (most-significant) is the sign bit.
11166 - BCD are the exponent (encoded as r XOR 3).
11167 - EFGH are the mantissa (encoded as n - 16).
11170 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11171 fconst[sd] instruction, or -1 if X isn't suitable. */
11172 static int
11173 vfp3_const_double_index (rtx x)
11175 REAL_VALUE_TYPE r, m;
11176 int sign, exponent;
11177 unsigned HOST_WIDE_INT mantissa, mant_hi;
11178 unsigned HOST_WIDE_INT mask;
11179 HOST_WIDE_INT m1, m2;
11180 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11182 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11183 return -1;
11185 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11187 /* We can't represent these things, so detect them first. */
11188 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11189 return -1;
11191 /* Extract sign, exponent and mantissa. */
11192 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11193 r = real_value_abs (&r);
11194 exponent = REAL_EXP (&r);
11195 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11196 highest (sign) bit, with a fixed binary point at bit point_pos.
11197 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11198 bits for the mantissa, this may fail (low bits would be lost). */
11199 real_ldexp (&m, &r, point_pos - exponent);
11200 REAL_VALUE_TO_INT (&m1, &m2, m);
11201 mantissa = m1;
11202 mant_hi = m2;
11204 /* If there are bits set in the low part of the mantissa, we can't
11205 represent this value. */
11206 if (mantissa != 0)
11207 return -1;
11209 /* Now make it so that mantissa contains the most-significant bits, and move
11210 the point_pos to indicate that the least-significant bits have been
11211 discarded. */
11212 point_pos -= HOST_BITS_PER_WIDE_INT;
11213 mantissa = mant_hi;
11215 /* We can permit four significant bits of mantissa only, plus a high bit
11216 which is always 1. */
11217 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11218 if ((mantissa & mask) != 0)
11219 return -1;
11221 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11222 mantissa >>= point_pos - 5;
11224 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11225 floating-point immediate zero with Neon using an integer-zero load, but
11226 that case is handled elsewhere.) */
11227 if (mantissa == 0)
11228 return -1;
11230 gcc_assert (mantissa >= 16 && mantissa <= 31);
11232 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11233 normalized significands are in the range [1, 2). (Our mantissa is shifted
11234 left 4 places at this point relative to normalized IEEE754 values). GCC
11235 internally uses [0.5, 1) (see real.c), so the exponent returned from
11236 REAL_EXP must be altered. */
11237 exponent = 5 - exponent;
11239 if (exponent < 0 || exponent > 7)
11240 return -1;
11242 /* Sign, mantissa and exponent are now in the correct form to plug into the
11243 formula described in the comment above. */
11244 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11247 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11249 vfp3_const_double_rtx (rtx x)
11251 if (!TARGET_VFP3)
11252 return 0;
11254 return vfp3_const_double_index (x) != -1;
11257 /* Recognize immediates which can be used in various Neon instructions. Legal
11258 immediates are described by the following table (for VMVN variants, the
11259 bitwise inverse of the constant shown is recognized. In either case, VMOV
11260 is output and the correct instruction to use for a given constant is chosen
11261 by the assembler). The constant shown is replicated across all elements of
11262 the destination vector.
11264 insn elems variant constant (binary)
11265 ---- ----- ------- -----------------
11266 vmov i32 0 00000000 00000000 00000000 abcdefgh
11267 vmov i32 1 00000000 00000000 abcdefgh 00000000
11268 vmov i32 2 00000000 abcdefgh 00000000 00000000
11269 vmov i32 3 abcdefgh 00000000 00000000 00000000
11270 vmov i16 4 00000000 abcdefgh
11271 vmov i16 5 abcdefgh 00000000
11272 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11273 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11274 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11275 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11276 vmvn i16 10 00000000 abcdefgh
11277 vmvn i16 11 abcdefgh 00000000
11278 vmov i32 12 00000000 00000000 abcdefgh 11111111
11279 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11280 vmov i32 14 00000000 abcdefgh 11111111 11111111
11281 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11282 vmov i8 16 abcdefgh
11283 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11284 eeeeeeee ffffffff gggggggg hhhhhhhh
11285 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11286 vmov f32 19 00000000 00000000 00000000 00000000
11288 For case 18, B = !b. Representable values are exactly those accepted by
11289 vfp3_const_double_index, but are output as floating-point numbers rather
11290 than indices.
11292 For case 19, we will change it to vmov.i32 when assembling.
11294 Variants 0-5 (inclusive) may also be used as immediates for the second
11295 operand of VORR/VBIC instructions.
11297 The INVERSE argument causes the bitwise inverse of the given operand to be
11298 recognized instead (used for recognizing legal immediates for the VAND/VORN
11299 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11300 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11301 output, rather than the real insns vbic/vorr).
11303 INVERSE makes no difference to the recognition of float vectors.
11305 The return value is the variant of immediate as shown in the above table, or
11306 -1 if the given value doesn't match any of the listed patterns.
11308 static int
11309 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11310 rtx *modconst, int *elementwidth)
11312 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11313 matches = 1; \
11314 for (i = 0; i < idx; i += (STRIDE)) \
11315 if (!(TEST)) \
11316 matches = 0; \
11317 if (matches) \
11319 immtype = (CLASS); \
11320 elsize = (ELSIZE); \
11321 break; \
11324 unsigned int i, elsize = 0, idx = 0, n_elts;
11325 unsigned int innersize;
11326 unsigned char bytes[16];
11327 int immtype = -1, matches;
11328 unsigned int invmask = inverse ? 0xff : 0;
11329 bool vector = GET_CODE (op) == CONST_VECTOR;
11331 if (vector)
11333 n_elts = CONST_VECTOR_NUNITS (op);
11334 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11336 else
11338 n_elts = 1;
11339 if (mode == VOIDmode)
11340 mode = DImode;
11341 innersize = GET_MODE_SIZE (mode);
11344 /* Vectors of float constants. */
11345 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11347 rtx el0 = CONST_VECTOR_ELT (op, 0);
11348 REAL_VALUE_TYPE r0;
11350 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11351 return -1;
11353 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11355 for (i = 1; i < n_elts; i++)
11357 rtx elt = CONST_VECTOR_ELT (op, i);
11358 REAL_VALUE_TYPE re;
11360 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11362 if (!REAL_VALUES_EQUAL (r0, re))
11363 return -1;
11366 if (modconst)
11367 *modconst = CONST_VECTOR_ELT (op, 0);
11369 if (elementwidth)
11370 *elementwidth = 0;
11372 if (el0 == CONST0_RTX (GET_MODE (el0)))
11373 return 19;
11374 else
11375 return 18;
11378 /* Splat vector constant out into a byte vector. */
11379 for (i = 0; i < n_elts; i++)
11381 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11382 unsigned HOST_WIDE_INT elpart;
11383 unsigned int part, parts;
11385 if (CONST_INT_P (el))
11387 elpart = INTVAL (el);
11388 parts = 1;
11390 else if (CONST_DOUBLE_P (el))
11392 elpart = CONST_DOUBLE_LOW (el);
11393 parts = 2;
11395 else
11396 gcc_unreachable ();
11398 for (part = 0; part < parts; part++)
11400 unsigned int byte;
11401 for (byte = 0; byte < innersize; byte++)
11403 bytes[idx++] = (elpart & 0xff) ^ invmask;
11404 elpart >>= BITS_PER_UNIT;
11406 if (CONST_DOUBLE_P (el))
11407 elpart = CONST_DOUBLE_HIGH (el);
11411 /* Sanity check. */
11412 gcc_assert (idx == GET_MODE_SIZE (mode));
11416 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11417 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11419 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11420 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11422 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11423 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11425 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11426 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11428 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11430 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11432 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11433 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11435 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11436 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11438 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11439 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11441 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11442 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11444 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11446 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11448 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11449 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11451 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11452 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11454 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11455 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11457 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11458 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11460 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11462 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11463 && bytes[i] == bytes[(i + 8) % idx]);
11465 while (0);
11467 if (immtype == -1)
11468 return -1;
11470 if (elementwidth)
11471 *elementwidth = elsize;
11473 if (modconst)
11475 unsigned HOST_WIDE_INT imm = 0;
11477 /* Un-invert bytes of recognized vector, if necessary. */
11478 if (invmask != 0)
11479 for (i = 0; i < idx; i++)
11480 bytes[i] ^= invmask;
11482 if (immtype == 17)
11484 /* FIXME: Broken on 32-bit H_W_I hosts. */
11485 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11487 for (i = 0; i < 8; i++)
11488 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11489 << (i * BITS_PER_UNIT);
11491 *modconst = GEN_INT (imm);
11493 else
11495 unsigned HOST_WIDE_INT imm = 0;
11497 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11498 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11500 *modconst = GEN_INT (imm);
11504 return immtype;
11505 #undef CHECK
11508 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11509 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11510 float elements), and a modified constant (whatever should be output for a
11511 VMOV) in *MODCONST. */
11514 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
11515 rtx *modconst, int *elementwidth)
11517 rtx tmpconst;
11518 int tmpwidth;
11519 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11521 if (retval == -1)
11522 return 0;
11524 if (modconst)
11525 *modconst = tmpconst;
11527 if (elementwidth)
11528 *elementwidth = tmpwidth;
11530 return 1;
11533 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11534 the immediate is valid, write a constant suitable for using as an operand
11535 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11536 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11539 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
11540 rtx *modconst, int *elementwidth)
11542 rtx tmpconst;
11543 int tmpwidth;
11544 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11546 if (retval < 0 || retval > 5)
11547 return 0;
11549 if (modconst)
11550 *modconst = tmpconst;
11552 if (elementwidth)
11553 *elementwidth = tmpwidth;
11555 return 1;
11558 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11559 the immediate is valid, write a constant suitable for using as an operand
11560 to VSHR/VSHL to *MODCONST and the corresponding element width to
11561 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11562 because they have different limitations. */
11565 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
11566 rtx *modconst, int *elementwidth,
11567 bool isleftshift)
11569 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11570 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11571 unsigned HOST_WIDE_INT last_elt = 0;
11572 unsigned HOST_WIDE_INT maxshift;
11574 /* Split vector constant out into a byte vector. */
11575 for (i = 0; i < n_elts; i++)
11577 rtx el = CONST_VECTOR_ELT (op, i);
11578 unsigned HOST_WIDE_INT elpart;
11580 if (CONST_INT_P (el))
11581 elpart = INTVAL (el);
11582 else if (CONST_DOUBLE_P (el))
11583 return 0;
11584 else
11585 gcc_unreachable ();
11587 if (i != 0 && elpart != last_elt)
11588 return 0;
11590 last_elt = elpart;
11593 /* Shift less than element size. */
11594 maxshift = innersize * 8;
11596 if (isleftshift)
11598 /* Left shift immediate value can be from 0 to <size>-1. */
11599 if (last_elt >= maxshift)
11600 return 0;
11602 else
11604 /* Right shift immediate value can be from 1 to <size>. */
11605 if (last_elt == 0 || last_elt > maxshift)
11606 return 0;
11609 if (elementwidth)
11610 *elementwidth = innersize * 8;
11612 if (modconst)
11613 *modconst = CONST_VECTOR_ELT (op, 0);
11615 return 1;
11618 /* Return a string suitable for output of Neon immediate logic operation
11619 MNEM. */
11621 char *
11622 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
11623 int inverse, int quad)
11625 int width, is_valid;
11626 static char templ[40];
11628 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11630 gcc_assert (is_valid != 0);
11632 if (quad)
11633 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11634 else
11635 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11637 return templ;
11640 /* Return a string suitable for output of Neon immediate shift operation
11641 (VSHR or VSHL) MNEM. */
11643 char *
11644 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11645 enum machine_mode mode, int quad,
11646 bool isleftshift)
11648 int width, is_valid;
11649 static char templ[40];
11651 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11652 gcc_assert (is_valid != 0);
11654 if (quad)
11655 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11656 else
11657 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11659 return templ;
11662 /* Output a sequence of pairwise operations to implement a reduction.
11663 NOTE: We do "too much work" here, because pairwise operations work on two
11664 registers-worth of operands in one go. Unfortunately we can't exploit those
11665 extra calculations to do the full operation in fewer steps, I don't think.
11666 Although all vector elements of the result but the first are ignored, we
11667 actually calculate the same result in each of the elements. An alternative
11668 such as initially loading a vector with zero to use as each of the second
11669 operands would use up an additional register and take an extra instruction,
11670 for no particular gain. */
11672 void
11673 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
11674 rtx (*reduc) (rtx, rtx, rtx))
11676 enum machine_mode inner = GET_MODE_INNER (mode);
11677 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
11678 rtx tmpsum = op1;
11680 for (i = parts / 2; i >= 1; i /= 2)
11682 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11683 emit_insn (reduc (dest, tmpsum, tmpsum));
11684 tmpsum = dest;
11688 /* If VALS is a vector constant that can be loaded into a register
11689 using VDUP, generate instructions to do so and return an RTX to
11690 assign to the register. Otherwise return NULL_RTX. */
11692 static rtx
11693 neon_vdup_constant (rtx vals)
11695 enum machine_mode mode = GET_MODE (vals);
11696 enum machine_mode inner_mode = GET_MODE_INNER (mode);
11697 int n_elts = GET_MODE_NUNITS (mode);
11698 bool all_same = true;
11699 rtx x;
11700 int i;
11702 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
11703 return NULL_RTX;
11705 for (i = 0; i < n_elts; ++i)
11707 x = XVECEXP (vals, 0, i);
11708 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11709 all_same = false;
11712 if (!all_same)
11713 /* The elements are not all the same. We could handle repeating
11714 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11715 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11716 vdup.i16). */
11717 return NULL_RTX;
11719 /* We can load this constant by using VDUP and a constant in a
11720 single ARM register. This will be cheaper than a vector
11721 load. */
11723 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
11724 return gen_rtx_VEC_DUPLICATE (mode, x);
11727 /* Generate code to load VALS, which is a PARALLEL containing only
11728 constants (for vec_init) or CONST_VECTOR, efficiently into a
11729 register. Returns an RTX to copy into the register, or NULL_RTX
11730 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11733 neon_make_constant (rtx vals)
11735 enum machine_mode mode = GET_MODE (vals);
11736 rtx target;
11737 rtx const_vec = NULL_RTX;
11738 int n_elts = GET_MODE_NUNITS (mode);
11739 int n_const = 0;
11740 int i;
11742 if (GET_CODE (vals) == CONST_VECTOR)
11743 const_vec = vals;
11744 else if (GET_CODE (vals) == PARALLEL)
11746 /* A CONST_VECTOR must contain only CONST_INTs and
11747 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11748 Only store valid constants in a CONST_VECTOR. */
11749 for (i = 0; i < n_elts; ++i)
11751 rtx x = XVECEXP (vals, 0, i);
11752 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11753 n_const++;
11755 if (n_const == n_elts)
11756 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11758 else
11759 gcc_unreachable ();
11761 if (const_vec != NULL
11762 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
11763 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11764 return const_vec;
11765 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
11766 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11767 pipeline cycle; creating the constant takes one or two ARM
11768 pipeline cycles. */
11769 return target;
11770 else if (const_vec != NULL_RTX)
11771 /* Load from constant pool. On Cortex-A8 this takes two cycles
11772 (for either double or quad vectors). We can not take advantage
11773 of single-cycle VLD1 because we need a PC-relative addressing
11774 mode. */
11775 return const_vec;
11776 else
11777 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11778 We can not construct an initializer. */
11779 return NULL_RTX;
11782 /* Initialize vector TARGET to VALS. */
11784 void
11785 neon_expand_vector_init (rtx target, rtx vals)
11787 enum machine_mode mode = GET_MODE (target);
11788 enum machine_mode inner_mode = GET_MODE_INNER (mode);
11789 int n_elts = GET_MODE_NUNITS (mode);
11790 int n_var = 0, one_var = -1;
11791 bool all_same = true;
11792 rtx x, mem;
11793 int i;
11795 for (i = 0; i < n_elts; ++i)
11797 x = XVECEXP (vals, 0, i);
11798 if (!CONSTANT_P (x))
11799 ++n_var, one_var = i;
11801 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11802 all_same = false;
11805 if (n_var == 0)
11807 rtx constant = neon_make_constant (vals);
11808 if (constant != NULL_RTX)
11810 emit_move_insn (target, constant);
11811 return;
11815 /* Splat a single non-constant element if we can. */
11816 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
11818 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
11819 emit_insn (gen_rtx_SET (VOIDmode, target,
11820 gen_rtx_VEC_DUPLICATE (mode, x)));
11821 return;
11824 /* One field is non-constant. Load constant then overwrite varying
11825 field. This is more efficient than using the stack. */
11826 if (n_var == 1)
11828 rtx copy = copy_rtx (vals);
11829 rtx index = GEN_INT (one_var);
11831 /* Load constant part of vector, substitute neighboring value for
11832 varying element. */
11833 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
11834 neon_expand_vector_init (target, copy);
11836 /* Insert variable. */
11837 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
11838 switch (mode)
11840 case V8QImode:
11841 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
11842 break;
11843 case V16QImode:
11844 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
11845 break;
11846 case V4HImode:
11847 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
11848 break;
11849 case V8HImode:
11850 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
11851 break;
11852 case V2SImode:
11853 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
11854 break;
11855 case V4SImode:
11856 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
11857 break;
11858 case V2SFmode:
11859 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
11860 break;
11861 case V4SFmode:
11862 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
11863 break;
11864 case V2DImode:
11865 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
11866 break;
11867 default:
11868 gcc_unreachable ();
11870 return;
11873 /* Construct the vector in memory one field at a time
11874 and load the whole vector. */
11875 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11876 for (i = 0; i < n_elts; i++)
11877 emit_move_insn (adjust_address_nv (mem, inner_mode,
11878 i * GET_MODE_SIZE (inner_mode)),
11879 XVECEXP (vals, 0, i));
11880 emit_move_insn (target, mem);
11883 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11884 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
11885 reported source locations are bogus. */
11887 static void
11888 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11889 const char *err)
11891 HOST_WIDE_INT lane;
11893 gcc_assert (CONST_INT_P (operand));
11895 lane = INTVAL (operand);
11897 if (lane < low || lane >= high)
11898 error (err);
11901 /* Bounds-check lanes. */
11903 void
11904 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
11906 bounds_check (operand, low, high, "lane out of range");
11909 /* Bounds-check constants. */
11911 void
11912 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
11914 bounds_check (operand, low, high, "constant out of range");
11917 HOST_WIDE_INT
11918 neon_element_bits (enum machine_mode mode)
11920 if (mode == DImode)
11921 return GET_MODE_BITSIZE (mode);
11922 else
11923 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
11927 /* Predicates for `match_operand' and `match_operator'. */
11929 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11930 WB is true if full writeback address modes are allowed and is false
11931 if limited writeback address modes (POST_INC and PRE_DEC) are
11932 allowed. */
11935 arm_coproc_mem_operand (rtx op, bool wb)
11937 rtx ind;
11939 /* Reject eliminable registers. */
11940 if (! (reload_in_progress || reload_completed)
11941 && ( reg_mentioned_p (frame_pointer_rtx, op)
11942 || reg_mentioned_p (arg_pointer_rtx, op)
11943 || reg_mentioned_p (virtual_incoming_args_rtx, op)
11944 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
11945 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
11946 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
11947 return FALSE;
11949 /* Constants are converted into offsets from labels. */
11950 if (!MEM_P (op))
11951 return FALSE;
11953 ind = XEXP (op, 0);
11955 if (reload_completed
11956 && (GET_CODE (ind) == LABEL_REF
11957 || (GET_CODE (ind) == CONST
11958 && GET_CODE (XEXP (ind, 0)) == PLUS
11959 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
11960 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
11961 return TRUE;
11963 /* Match: (mem (reg)). */
11964 if (REG_P (ind))
11965 return arm_address_register_rtx_p (ind, 0);
11967 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11968 acceptable in any case (subject to verification by
11969 arm_address_register_rtx_p). We need WB to be true to accept
11970 PRE_INC and POST_DEC. */
11971 if (GET_CODE (ind) == POST_INC
11972 || GET_CODE (ind) == PRE_DEC
11973 || (wb
11974 && (GET_CODE (ind) == PRE_INC
11975 || GET_CODE (ind) == POST_DEC)))
11976 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
11978 if (wb
11979 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
11980 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
11981 && GET_CODE (XEXP (ind, 1)) == PLUS
11982 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
11983 ind = XEXP (ind, 1);
11985 /* Match:
11986 (plus (reg)
11987 (const)). */
11988 if (GET_CODE (ind) == PLUS
11989 && REG_P (XEXP (ind, 0))
11990 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
11991 && CONST_INT_P (XEXP (ind, 1))
11992 && INTVAL (XEXP (ind, 1)) > -1024
11993 && INTVAL (XEXP (ind, 1)) < 1024
11994 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
11995 return TRUE;
11997 return FALSE;
12000 /* Return TRUE if OP is a memory operand which we can load or store a vector
12001 to/from. TYPE is one of the following values:
12002 0 - Vector load/stor (vldr)
12003 1 - Core registers (ldm)
12004 2 - Element/structure loads (vld1)
12007 neon_vector_mem_operand (rtx op, int type, bool strict)
12009 rtx ind;
12011 /* Reject eliminable registers. */
12012 if (! (reload_in_progress || reload_completed)
12013 && ( reg_mentioned_p (frame_pointer_rtx, op)
12014 || reg_mentioned_p (arg_pointer_rtx, op)
12015 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12016 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12017 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12018 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12019 return !strict;
12021 /* Constants are converted into offsets from labels. */
12022 if (!MEM_P (op))
12023 return FALSE;
12025 ind = XEXP (op, 0);
12027 if (reload_completed
12028 && (GET_CODE (ind) == LABEL_REF
12029 || (GET_CODE (ind) == CONST
12030 && GET_CODE (XEXP (ind, 0)) == PLUS
12031 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12032 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12033 return TRUE;
12035 /* Match: (mem (reg)). */
12036 if (REG_P (ind))
12037 return arm_address_register_rtx_p (ind, 0);
12039 /* Allow post-increment with Neon registers. */
12040 if ((type != 1 && GET_CODE (ind) == POST_INC)
12041 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12042 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12044 /* FIXME: vld1 allows register post-modify. */
12046 /* Match:
12047 (plus (reg)
12048 (const)). */
12049 if (type == 0
12050 && GET_CODE (ind) == PLUS
12051 && REG_P (XEXP (ind, 0))
12052 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12053 && CONST_INT_P (XEXP (ind, 1))
12054 && INTVAL (XEXP (ind, 1)) > -1024
12055 /* For quad modes, we restrict the constant offset to be slightly less
12056 than what the instruction format permits. We have no such constraint
12057 on double mode offsets. (This must match arm_legitimate_index_p.) */
12058 && (INTVAL (XEXP (ind, 1))
12059 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12060 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12061 return TRUE;
12063 return FALSE;
12066 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12067 type. */
12069 neon_struct_mem_operand (rtx op)
12071 rtx ind;
12073 /* Reject eliminable registers. */
12074 if (! (reload_in_progress || reload_completed)
12075 && ( reg_mentioned_p (frame_pointer_rtx, op)
12076 || reg_mentioned_p (arg_pointer_rtx, op)
12077 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12078 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12079 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12080 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12081 return FALSE;
12083 /* Constants are converted into offsets from labels. */
12084 if (!MEM_P (op))
12085 return FALSE;
12087 ind = XEXP (op, 0);
12089 if (reload_completed
12090 && (GET_CODE (ind) == LABEL_REF
12091 || (GET_CODE (ind) == CONST
12092 && GET_CODE (XEXP (ind, 0)) == PLUS
12093 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12094 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12095 return TRUE;
12097 /* Match: (mem (reg)). */
12098 if (REG_P (ind))
12099 return arm_address_register_rtx_p (ind, 0);
12101 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12102 if (GET_CODE (ind) == POST_INC
12103 || GET_CODE (ind) == PRE_DEC)
12104 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12106 return FALSE;
12109 /* Return true if X is a register that will be eliminated later on. */
12111 arm_eliminable_register (rtx x)
12113 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12114 || REGNO (x) == ARG_POINTER_REGNUM
12115 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12116 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12119 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12120 coprocessor registers. Otherwise return NO_REGS. */
12122 enum reg_class
12123 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12125 if (mode == HFmode)
12127 if (!TARGET_NEON_FP16)
12128 return GENERAL_REGS;
12129 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12130 return NO_REGS;
12131 return GENERAL_REGS;
12134 /* The neon move patterns handle all legitimate vector and struct
12135 addresses. */
12136 if (TARGET_NEON
12137 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12138 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12139 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12140 || VALID_NEON_STRUCT_MODE (mode)))
12141 return NO_REGS;
12143 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12144 return NO_REGS;
12146 return GENERAL_REGS;
12149 /* Values which must be returned in the most-significant end of the return
12150 register. */
12152 static bool
12153 arm_return_in_msb (const_tree valtype)
12155 return (TARGET_AAPCS_BASED
12156 && BYTES_BIG_ENDIAN
12157 && (AGGREGATE_TYPE_P (valtype)
12158 || TREE_CODE (valtype) == COMPLEX_TYPE
12159 || FIXED_POINT_TYPE_P (valtype)));
12162 /* Return TRUE if X references a SYMBOL_REF. */
12164 symbol_mentioned_p (rtx x)
12166 const char * fmt;
12167 int i;
12169 if (GET_CODE (x) == SYMBOL_REF)
12170 return 1;
12172 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12173 are constant offsets, not symbols. */
12174 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12175 return 0;
12177 fmt = GET_RTX_FORMAT (GET_CODE (x));
12179 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12181 if (fmt[i] == 'E')
12183 int j;
12185 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12186 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12187 return 1;
12189 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12190 return 1;
12193 return 0;
12196 /* Return TRUE if X references a LABEL_REF. */
12198 label_mentioned_p (rtx x)
12200 const char * fmt;
12201 int i;
12203 if (GET_CODE (x) == LABEL_REF)
12204 return 1;
12206 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12207 instruction, but they are constant offsets, not symbols. */
12208 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12209 return 0;
12211 fmt = GET_RTX_FORMAT (GET_CODE (x));
12212 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12214 if (fmt[i] == 'E')
12216 int j;
12218 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12219 if (label_mentioned_p (XVECEXP (x, i, j)))
12220 return 1;
12222 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12223 return 1;
12226 return 0;
12230 tls_mentioned_p (rtx x)
12232 switch (GET_CODE (x))
12234 case CONST:
12235 return tls_mentioned_p (XEXP (x, 0));
12237 case UNSPEC:
12238 if (XINT (x, 1) == UNSPEC_TLS)
12239 return 1;
12241 default:
12242 return 0;
12246 /* Must not copy any rtx that uses a pc-relative address. */
12248 static int
12249 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12251 if (GET_CODE (*x) == UNSPEC
12252 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12253 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12254 return 1;
12255 return 0;
12258 static bool
12259 arm_cannot_copy_insn_p (rtx insn)
12261 /* The tls call insn cannot be copied, as it is paired with a data
12262 word. */
12263 if (recog_memoized (insn) == CODE_FOR_tlscall)
12264 return true;
12266 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12269 enum rtx_code
12270 minmax_code (rtx x)
12272 enum rtx_code code = GET_CODE (x);
12274 switch (code)
12276 case SMAX:
12277 return GE;
12278 case SMIN:
12279 return LE;
12280 case UMIN:
12281 return LEU;
12282 case UMAX:
12283 return GEU;
12284 default:
12285 gcc_unreachable ();
12289 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12291 bool
12292 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12293 int *mask, bool *signed_sat)
12295 /* The high bound must be a power of two minus one. */
12296 int log = exact_log2 (INTVAL (hi_bound) + 1);
12297 if (log == -1)
12298 return false;
12300 /* The low bound is either zero (for usat) or one less than the
12301 negation of the high bound (for ssat). */
12302 if (INTVAL (lo_bound) == 0)
12304 if (mask)
12305 *mask = log;
12306 if (signed_sat)
12307 *signed_sat = false;
12309 return true;
12312 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12314 if (mask)
12315 *mask = log + 1;
12316 if (signed_sat)
12317 *signed_sat = true;
12319 return true;
12322 return false;
12325 /* Return 1 if memory locations are adjacent. */
12327 adjacent_mem_locations (rtx a, rtx b)
12329 /* We don't guarantee to preserve the order of these memory refs. */
12330 if (volatile_refs_p (a) || volatile_refs_p (b))
12331 return 0;
12333 if ((REG_P (XEXP (a, 0))
12334 || (GET_CODE (XEXP (a, 0)) == PLUS
12335 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12336 && (REG_P (XEXP (b, 0))
12337 || (GET_CODE (XEXP (b, 0)) == PLUS
12338 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12340 HOST_WIDE_INT val0 = 0, val1 = 0;
12341 rtx reg0, reg1;
12342 int val_diff;
12344 if (GET_CODE (XEXP (a, 0)) == PLUS)
12346 reg0 = XEXP (XEXP (a, 0), 0);
12347 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12349 else
12350 reg0 = XEXP (a, 0);
12352 if (GET_CODE (XEXP (b, 0)) == PLUS)
12354 reg1 = XEXP (XEXP (b, 0), 0);
12355 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12357 else
12358 reg1 = XEXP (b, 0);
12360 /* Don't accept any offset that will require multiple
12361 instructions to handle, since this would cause the
12362 arith_adjacentmem pattern to output an overlong sequence. */
12363 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12364 return 0;
12366 /* Don't allow an eliminable register: register elimination can make
12367 the offset too large. */
12368 if (arm_eliminable_register (reg0))
12369 return 0;
12371 val_diff = val1 - val0;
12373 if (arm_ld_sched)
12375 /* If the target has load delay slots, then there's no benefit
12376 to using an ldm instruction unless the offset is zero and
12377 we are optimizing for size. */
12378 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12379 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12380 && (val_diff == 4 || val_diff == -4));
12383 return ((REGNO (reg0) == REGNO (reg1))
12384 && (val_diff == 4 || val_diff == -4));
12387 return 0;
12390 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12391 for load operations, false for store operations. CONSECUTIVE is true
12392 if the register numbers in the operation must be consecutive in the register
12393 bank. RETURN_PC is true if value is to be loaded in PC.
12394 The pattern we are trying to match for load is:
12395 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12396 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12399 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12401 where
12402 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12403 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12404 3. If consecutive is TRUE, then for kth register being loaded,
12405 REGNO (R_dk) = REGNO (R_d0) + k.
12406 The pattern for store is similar. */
12407 bool
12408 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12409 bool consecutive, bool return_pc)
12411 HOST_WIDE_INT count = XVECLEN (op, 0);
12412 rtx reg, mem, addr;
12413 unsigned regno;
12414 unsigned first_regno;
12415 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12416 rtx elt;
12417 bool addr_reg_in_reglist = false;
12418 bool update = false;
12419 int reg_increment;
12420 int offset_adj;
12421 int regs_per_val;
12423 /* If not in SImode, then registers must be consecutive
12424 (e.g., VLDM instructions for DFmode). */
12425 gcc_assert ((mode == SImode) || consecutive);
12426 /* Setting return_pc for stores is illegal. */
12427 gcc_assert (!return_pc || load);
12429 /* Set up the increments and the regs per val based on the mode. */
12430 reg_increment = GET_MODE_SIZE (mode);
12431 regs_per_val = reg_increment / 4;
12432 offset_adj = return_pc ? 1 : 0;
12434 if (count <= 1
12435 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12436 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12437 return false;
12439 /* Check if this is a write-back. */
12440 elt = XVECEXP (op, 0, offset_adj);
12441 if (GET_CODE (SET_SRC (elt)) == PLUS)
12443 i++;
12444 base = 1;
12445 update = true;
12447 /* The offset adjustment must be the number of registers being
12448 popped times the size of a single register. */
12449 if (!REG_P (SET_DEST (elt))
12450 || !REG_P (XEXP (SET_SRC (elt), 0))
12451 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12452 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12453 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12454 ((count - 1 - offset_adj) * reg_increment))
12455 return false;
12458 i = i + offset_adj;
12459 base = base + offset_adj;
12460 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12461 success depends on the type: VLDM can do just one reg,
12462 LDM must do at least two. */
12463 if ((count <= i) && (mode == SImode))
12464 return false;
12466 elt = XVECEXP (op, 0, i - 1);
12467 if (GET_CODE (elt) != SET)
12468 return false;
12470 if (load)
12472 reg = SET_DEST (elt);
12473 mem = SET_SRC (elt);
12475 else
12477 reg = SET_SRC (elt);
12478 mem = SET_DEST (elt);
12481 if (!REG_P (reg) || !MEM_P (mem))
12482 return false;
12484 regno = REGNO (reg);
12485 first_regno = regno;
12486 addr = XEXP (mem, 0);
12487 if (GET_CODE (addr) == PLUS)
12489 if (!CONST_INT_P (XEXP (addr, 1)))
12490 return false;
12492 offset = INTVAL (XEXP (addr, 1));
12493 addr = XEXP (addr, 0);
12496 if (!REG_P (addr))
12497 return false;
12499 /* Don't allow SP to be loaded unless it is also the base register. It
12500 guarantees that SP is reset correctly when an LDM instruction
12501 is interrupted. Otherwise, we might end up with a corrupt stack. */
12502 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12503 return false;
12505 for (; i < count; i++)
12507 elt = XVECEXP (op, 0, i);
12508 if (GET_CODE (elt) != SET)
12509 return false;
12511 if (load)
12513 reg = SET_DEST (elt);
12514 mem = SET_SRC (elt);
12516 else
12518 reg = SET_SRC (elt);
12519 mem = SET_DEST (elt);
12522 if (!REG_P (reg)
12523 || GET_MODE (reg) != mode
12524 || REGNO (reg) <= regno
12525 || (consecutive
12526 && (REGNO (reg) !=
12527 (unsigned int) (first_regno + regs_per_val * (i - base))))
12528 /* Don't allow SP to be loaded unless it is also the base register. It
12529 guarantees that SP is reset correctly when an LDM instruction
12530 is interrupted. Otherwise, we might end up with a corrupt stack. */
12531 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12532 || !MEM_P (mem)
12533 || GET_MODE (mem) != mode
12534 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12535 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12536 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12537 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12538 offset + (i - base) * reg_increment))
12539 && (!REG_P (XEXP (mem, 0))
12540 || offset + (i - base) * reg_increment != 0)))
12541 return false;
12543 regno = REGNO (reg);
12544 if (regno == REGNO (addr))
12545 addr_reg_in_reglist = true;
12548 if (load)
12550 if (update && addr_reg_in_reglist)
12551 return false;
12553 /* For Thumb-1, address register is always modified - either by write-back
12554 or by explicit load. If the pattern does not describe an update,
12555 then the address register must be in the list of loaded registers. */
12556 if (TARGET_THUMB1)
12557 return update || addr_reg_in_reglist;
12560 return true;
12563 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12564 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12565 instruction. ADD_OFFSET is nonzero if the base address register needs
12566 to be modified with an add instruction before we can use it. */
12568 static bool
12569 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12570 int nops, HOST_WIDE_INT add_offset)
12572 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12573 if the offset isn't small enough. The reason 2 ldrs are faster
12574 is because these ARMs are able to do more than one cache access
12575 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12576 whilst the ARM8 has a double bandwidth cache. This means that
12577 these cores can do both an instruction fetch and a data fetch in
12578 a single cycle, so the trick of calculating the address into a
12579 scratch register (one of the result regs) and then doing a load
12580 multiple actually becomes slower (and no smaller in code size).
12581 That is the transformation
12583 ldr rd1, [rbase + offset]
12584 ldr rd2, [rbase + offset + 4]
12588 add rd1, rbase, offset
12589 ldmia rd1, {rd1, rd2}
12591 produces worse code -- '3 cycles + any stalls on rd2' instead of
12592 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12593 access per cycle, the first sequence could never complete in less
12594 than 6 cycles, whereas the ldm sequence would only take 5 and
12595 would make better use of sequential accesses if not hitting the
12596 cache.
12598 We cheat here and test 'arm_ld_sched' which we currently know to
12599 only be true for the ARM8, ARM9 and StrongARM. If this ever
12600 changes, then the test below needs to be reworked. */
12601 if (nops == 2 && arm_ld_sched && add_offset != 0)
12602 return false;
12604 /* XScale has load-store double instructions, but they have stricter
12605 alignment requirements than load-store multiple, so we cannot
12606 use them.
12608 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12609 the pipeline until completion.
12611 NREGS CYCLES
12617 An ldr instruction takes 1-3 cycles, but does not block the
12618 pipeline.
12620 NREGS CYCLES
12621 1 1-3
12622 2 2-6
12623 3 3-9
12624 4 4-12
12626 Best case ldr will always win. However, the more ldr instructions
12627 we issue, the less likely we are to be able to schedule them well.
12628 Using ldr instructions also increases code size.
12630 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12631 for counts of 3 or 4 regs. */
12632 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12633 return false;
12634 return true;
12637 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12638 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12639 an array ORDER which describes the sequence to use when accessing the
12640 offsets that produces an ascending order. In this sequence, each
12641 offset must be larger by exactly 4 than the previous one. ORDER[0]
12642 must have been filled in with the lowest offset by the caller.
12643 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12644 we use to verify that ORDER produces an ascending order of registers.
12645 Return true if it was possible to construct such an order, false if
12646 not. */
12648 static bool
12649 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12650 int *unsorted_regs)
12652 int i;
12653 for (i = 1; i < nops; i++)
12655 int j;
12657 order[i] = order[i - 1];
12658 for (j = 0; j < nops; j++)
12659 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12661 /* We must find exactly one offset that is higher than the
12662 previous one by 4. */
12663 if (order[i] != order[i - 1])
12664 return false;
12665 order[i] = j;
12667 if (order[i] == order[i - 1])
12668 return false;
12669 /* The register numbers must be ascending. */
12670 if (unsorted_regs != NULL
12671 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12672 return false;
12674 return true;
12677 /* Used to determine in a peephole whether a sequence of load
12678 instructions can be changed into a load-multiple instruction.
12679 NOPS is the number of separate load instructions we are examining. The
12680 first NOPS entries in OPERANDS are the destination registers, the
12681 next NOPS entries are memory operands. If this function is
12682 successful, *BASE is set to the common base register of the memory
12683 accesses; *LOAD_OFFSET is set to the first memory location's offset
12684 from that base register.
12685 REGS is an array filled in with the destination register numbers.
12686 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12687 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12688 the sequence of registers in REGS matches the loads from ascending memory
12689 locations, and the function verifies that the register numbers are
12690 themselves ascending. If CHECK_REGS is false, the register numbers
12691 are stored in the order they are found in the operands. */
12692 static int
12693 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
12694 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
12696 int unsorted_regs[MAX_LDM_STM_OPS];
12697 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12698 int order[MAX_LDM_STM_OPS];
12699 rtx base_reg_rtx = NULL;
12700 int base_reg = -1;
12701 int i, ldm_case;
12703 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12704 easily extended if required. */
12705 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12707 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12709 /* Loop over the operands and check that the memory references are
12710 suitable (i.e. immediate offsets from the same base register). At
12711 the same time, extract the target register, and the memory
12712 offsets. */
12713 for (i = 0; i < nops; i++)
12715 rtx reg;
12716 rtx offset;
12718 /* Convert a subreg of a mem into the mem itself. */
12719 if (GET_CODE (operands[nops + i]) == SUBREG)
12720 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12722 gcc_assert (MEM_P (operands[nops + i]));
12724 /* Don't reorder volatile memory references; it doesn't seem worth
12725 looking for the case where the order is ok anyway. */
12726 if (MEM_VOLATILE_P (operands[nops + i]))
12727 return 0;
12729 offset = const0_rtx;
12731 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12732 || (GET_CODE (reg) == SUBREG
12733 && REG_P (reg = SUBREG_REG (reg))))
12734 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12735 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12736 || (GET_CODE (reg) == SUBREG
12737 && REG_P (reg = SUBREG_REG (reg))))
12738 && (CONST_INT_P (offset
12739 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12741 if (i == 0)
12743 base_reg = REGNO (reg);
12744 base_reg_rtx = reg;
12745 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12746 return 0;
12748 else if (base_reg != (int) REGNO (reg))
12749 /* Not addressed from the same base register. */
12750 return 0;
12752 unsorted_regs[i] = (REG_P (operands[i])
12753 ? REGNO (operands[i])
12754 : REGNO (SUBREG_REG (operands[i])));
12756 /* If it isn't an integer register, or if it overwrites the
12757 base register but isn't the last insn in the list, then
12758 we can't do this. */
12759 if (unsorted_regs[i] < 0
12760 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12761 || unsorted_regs[i] > 14
12762 || (i != nops - 1 && unsorted_regs[i] == base_reg))
12763 return 0;
12765 /* Don't allow SP to be loaded unless it is also the base
12766 register. It guarantees that SP is reset correctly when
12767 an LDM instruction is interrupted. Otherwise, we might
12768 end up with a corrupt stack. */
12769 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
12770 return 0;
12772 unsorted_offsets[i] = INTVAL (offset);
12773 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12774 order[0] = i;
12776 else
12777 /* Not a suitable memory address. */
12778 return 0;
12781 /* All the useful information has now been extracted from the
12782 operands into unsorted_regs and unsorted_offsets; additionally,
12783 order[0] has been set to the lowest offset in the list. Sort
12784 the offsets into order, verifying that they are adjacent, and
12785 check that the register numbers are ascending. */
12786 if (!compute_offset_order (nops, unsorted_offsets, order,
12787 check_regs ? unsorted_regs : NULL))
12788 return 0;
12790 if (saved_order)
12791 memcpy (saved_order, order, sizeof order);
12793 if (base)
12795 *base = base_reg;
12797 for (i = 0; i < nops; i++)
12798 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12800 *load_offset = unsorted_offsets[order[0]];
12803 if (TARGET_THUMB1
12804 && !peep2_reg_dead_p (nops, base_reg_rtx))
12805 return 0;
12807 if (unsorted_offsets[order[0]] == 0)
12808 ldm_case = 1; /* ldmia */
12809 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12810 ldm_case = 2; /* ldmib */
12811 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12812 ldm_case = 3; /* ldmda */
12813 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12814 ldm_case = 4; /* ldmdb */
12815 else if (const_ok_for_arm (unsorted_offsets[order[0]])
12816 || const_ok_for_arm (-unsorted_offsets[order[0]]))
12817 ldm_case = 5;
12818 else
12819 return 0;
12821 if (!multiple_operation_profitable_p (false, nops,
12822 ldm_case == 5
12823 ? unsorted_offsets[order[0]] : 0))
12824 return 0;
12826 return ldm_case;
12829 /* Used to determine in a peephole whether a sequence of store instructions can
12830 be changed into a store-multiple instruction.
12831 NOPS is the number of separate store instructions we are examining.
12832 NOPS_TOTAL is the total number of instructions recognized by the peephole
12833 pattern.
12834 The first NOPS entries in OPERANDS are the source registers, the next
12835 NOPS entries are memory operands. If this function is successful, *BASE is
12836 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12837 to the first memory location's offset from that base register. REGS is an
12838 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12839 likewise filled with the corresponding rtx's.
12840 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12841 numbers to an ascending order of stores.
12842 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12843 from ascending memory locations, and the function verifies that the register
12844 numbers are themselves ascending. If CHECK_REGS is false, the register
12845 numbers are stored in the order they are found in the operands. */
12846 static int
12847 store_multiple_sequence (rtx *operands, int nops, int nops_total,
12848 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
12849 HOST_WIDE_INT *load_offset, bool check_regs)
12851 int unsorted_regs[MAX_LDM_STM_OPS];
12852 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
12853 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12854 int order[MAX_LDM_STM_OPS];
12855 int base_reg = -1;
12856 rtx base_reg_rtx = NULL;
12857 int i, stm_case;
12859 /* Write back of base register is currently only supported for Thumb 1. */
12860 int base_writeback = TARGET_THUMB1;
12862 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12863 easily extended if required. */
12864 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12866 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12868 /* Loop over the operands and check that the memory references are
12869 suitable (i.e. immediate offsets from the same base register). At
12870 the same time, extract the target register, and the memory
12871 offsets. */
12872 for (i = 0; i < nops; i++)
12874 rtx reg;
12875 rtx offset;
12877 /* Convert a subreg of a mem into the mem itself. */
12878 if (GET_CODE (operands[nops + i]) == SUBREG)
12879 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12881 gcc_assert (MEM_P (operands[nops + i]));
12883 /* Don't reorder volatile memory references; it doesn't seem worth
12884 looking for the case where the order is ok anyway. */
12885 if (MEM_VOLATILE_P (operands[nops + i]))
12886 return 0;
12888 offset = const0_rtx;
12890 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12891 || (GET_CODE (reg) == SUBREG
12892 && REG_P (reg = SUBREG_REG (reg))))
12893 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12894 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12895 || (GET_CODE (reg) == SUBREG
12896 && REG_P (reg = SUBREG_REG (reg))))
12897 && (CONST_INT_P (offset
12898 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12900 unsorted_reg_rtxs[i] = (REG_P (operands[i])
12901 ? operands[i] : SUBREG_REG (operands[i]));
12902 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
12904 if (i == 0)
12906 base_reg = REGNO (reg);
12907 base_reg_rtx = reg;
12908 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12909 return 0;
12911 else if (base_reg != (int) REGNO (reg))
12912 /* Not addressed from the same base register. */
12913 return 0;
12915 /* If it isn't an integer register, then we can't do this. */
12916 if (unsorted_regs[i] < 0
12917 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12918 /* The effects are unpredictable if the base register is
12919 both updated and stored. */
12920 || (base_writeback && unsorted_regs[i] == base_reg)
12921 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
12922 || unsorted_regs[i] > 14)
12923 return 0;
12925 unsorted_offsets[i] = INTVAL (offset);
12926 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
12927 order[0] = i;
12929 else
12930 /* Not a suitable memory address. */
12931 return 0;
12934 /* All the useful information has now been extracted from the
12935 operands into unsorted_regs and unsorted_offsets; additionally,
12936 order[0] has been set to the lowest offset in the list. Sort
12937 the offsets into order, verifying that they are adjacent, and
12938 check that the register numbers are ascending. */
12939 if (!compute_offset_order (nops, unsorted_offsets, order,
12940 check_regs ? unsorted_regs : NULL))
12941 return 0;
12943 if (saved_order)
12944 memcpy (saved_order, order, sizeof order);
12946 if (base)
12948 *base = base_reg;
12950 for (i = 0; i < nops; i++)
12952 regs[i] = unsorted_regs[check_regs ? order[i] : i];
12953 if (reg_rtxs)
12954 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
12957 *load_offset = unsorted_offsets[order[0]];
12960 if (TARGET_THUMB1
12961 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
12962 return 0;
12964 if (unsorted_offsets[order[0]] == 0)
12965 stm_case = 1; /* stmia */
12966 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
12967 stm_case = 2; /* stmib */
12968 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
12969 stm_case = 3; /* stmda */
12970 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
12971 stm_case = 4; /* stmdb */
12972 else
12973 return 0;
12975 if (!multiple_operation_profitable_p (false, nops, 0))
12976 return 0;
12978 return stm_case;
12981 /* Routines for use in generating RTL. */
12983 /* Generate a load-multiple instruction. COUNT is the number of loads in
12984 the instruction; REGS and MEMS are arrays containing the operands.
12985 BASEREG is the base register to be used in addressing the memory operands.
12986 WBACK_OFFSET is nonzero if the instruction should update the base
12987 register. */
12989 static rtx
12990 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
12991 HOST_WIDE_INT wback_offset)
12993 int i = 0, j;
12994 rtx result;
12996 if (!multiple_operation_profitable_p (false, count, 0))
12998 rtx seq;
13000 start_sequence ();
13002 for (i = 0; i < count; i++)
13003 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13005 if (wback_offset != 0)
13006 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13008 seq = get_insns ();
13009 end_sequence ();
13011 return seq;
13014 result = gen_rtx_PARALLEL (VOIDmode,
13015 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13016 if (wback_offset != 0)
13018 XVECEXP (result, 0, 0)
13019 = gen_rtx_SET (VOIDmode, basereg,
13020 plus_constant (Pmode, basereg, wback_offset));
13021 i = 1;
13022 count++;
13025 for (j = 0; i < count; i++, j++)
13026 XVECEXP (result, 0, i)
13027 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13029 return result;
13032 /* Generate a store-multiple instruction. COUNT is the number of stores in
13033 the instruction; REGS and MEMS are arrays containing the operands.
13034 BASEREG is the base register to be used in addressing the memory operands.
13035 WBACK_OFFSET is nonzero if the instruction should update the base
13036 register. */
13038 static rtx
13039 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13040 HOST_WIDE_INT wback_offset)
13042 int i = 0, j;
13043 rtx result;
13045 if (GET_CODE (basereg) == PLUS)
13046 basereg = XEXP (basereg, 0);
13048 if (!multiple_operation_profitable_p (false, count, 0))
13050 rtx seq;
13052 start_sequence ();
13054 for (i = 0; i < count; i++)
13055 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13057 if (wback_offset != 0)
13058 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13060 seq = get_insns ();
13061 end_sequence ();
13063 return seq;
13066 result = gen_rtx_PARALLEL (VOIDmode,
13067 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13068 if (wback_offset != 0)
13070 XVECEXP (result, 0, 0)
13071 = gen_rtx_SET (VOIDmode, basereg,
13072 plus_constant (Pmode, basereg, wback_offset));
13073 i = 1;
13074 count++;
13077 for (j = 0; i < count; i++, j++)
13078 XVECEXP (result, 0, i)
13079 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13081 return result;
13084 /* Generate either a load-multiple or a store-multiple instruction. This
13085 function can be used in situations where we can start with a single MEM
13086 rtx and adjust its address upwards.
13087 COUNT is the number of operations in the instruction, not counting a
13088 possible update of the base register. REGS is an array containing the
13089 register operands.
13090 BASEREG is the base register to be used in addressing the memory operands,
13091 which are constructed from BASEMEM.
13092 WRITE_BACK specifies whether the generated instruction should include an
13093 update of the base register.
13094 OFFSETP is used to pass an offset to and from this function; this offset
13095 is not used when constructing the address (instead BASEMEM should have an
13096 appropriate offset in its address), it is used only for setting
13097 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13099 static rtx
13100 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13101 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13103 rtx mems[MAX_LDM_STM_OPS];
13104 HOST_WIDE_INT offset = *offsetp;
13105 int i;
13107 gcc_assert (count <= MAX_LDM_STM_OPS);
13109 if (GET_CODE (basereg) == PLUS)
13110 basereg = XEXP (basereg, 0);
13112 for (i = 0; i < count; i++)
13114 rtx addr = plus_constant (Pmode, basereg, i * 4);
13115 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13116 offset += 4;
13119 if (write_back)
13120 *offsetp = offset;
13122 if (is_load)
13123 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13124 write_back ? 4 * count : 0);
13125 else
13126 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13127 write_back ? 4 * count : 0);
13131 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13132 rtx basemem, HOST_WIDE_INT *offsetp)
13134 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13135 offsetp);
13139 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13140 rtx basemem, HOST_WIDE_INT *offsetp)
13142 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13143 offsetp);
13146 /* Called from a peephole2 expander to turn a sequence of loads into an
13147 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13148 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13149 is true if we can reorder the registers because they are used commutatively
13150 subsequently.
13151 Returns true iff we could generate a new instruction. */
13153 bool
13154 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13156 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13157 rtx mems[MAX_LDM_STM_OPS];
13158 int i, j, base_reg;
13159 rtx base_reg_rtx;
13160 HOST_WIDE_INT offset;
13161 int write_back = FALSE;
13162 int ldm_case;
13163 rtx addr;
13165 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13166 &base_reg, &offset, !sort_regs);
13168 if (ldm_case == 0)
13169 return false;
13171 if (sort_regs)
13172 for (i = 0; i < nops - 1; i++)
13173 for (j = i + 1; j < nops; j++)
13174 if (regs[i] > regs[j])
13176 int t = regs[i];
13177 regs[i] = regs[j];
13178 regs[j] = t;
13180 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13182 if (TARGET_THUMB1)
13184 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13185 gcc_assert (ldm_case == 1 || ldm_case == 5);
13186 write_back = TRUE;
13189 if (ldm_case == 5)
13191 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13192 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13193 offset = 0;
13194 if (!TARGET_THUMB1)
13196 base_reg = regs[0];
13197 base_reg_rtx = newbase;
13201 for (i = 0; i < nops; i++)
13203 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13204 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13205 SImode, addr, 0);
13207 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13208 write_back ? offset + i * 4 : 0));
13209 return true;
13212 /* Called from a peephole2 expander to turn a sequence of stores into an
13213 STM instruction. OPERANDS are the operands found by the peephole matcher;
13214 NOPS indicates how many separate stores we are trying to combine.
13215 Returns true iff we could generate a new instruction. */
13217 bool
13218 gen_stm_seq (rtx *operands, int nops)
13220 int i;
13221 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13222 rtx mems[MAX_LDM_STM_OPS];
13223 int base_reg;
13224 rtx base_reg_rtx;
13225 HOST_WIDE_INT offset;
13226 int write_back = FALSE;
13227 int stm_case;
13228 rtx addr;
13229 bool base_reg_dies;
13231 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13232 mem_order, &base_reg, &offset, true);
13234 if (stm_case == 0)
13235 return false;
13237 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13239 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13240 if (TARGET_THUMB1)
13242 gcc_assert (base_reg_dies);
13243 write_back = TRUE;
13246 if (stm_case == 5)
13248 gcc_assert (base_reg_dies);
13249 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13250 offset = 0;
13253 addr = plus_constant (Pmode, base_reg_rtx, offset);
13255 for (i = 0; i < nops; i++)
13257 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13258 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13259 SImode, addr, 0);
13261 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13262 write_back ? offset + i * 4 : 0));
13263 return true;
13266 /* Called from a peephole2 expander to turn a sequence of stores that are
13267 preceded by constant loads into an STM instruction. OPERANDS are the
13268 operands found by the peephole matcher; NOPS indicates how many
13269 separate stores we are trying to combine; there are 2 * NOPS
13270 instructions in the peephole.
13271 Returns true iff we could generate a new instruction. */
13273 bool
13274 gen_const_stm_seq (rtx *operands, int nops)
13276 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13277 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13278 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13279 rtx mems[MAX_LDM_STM_OPS];
13280 int base_reg;
13281 rtx base_reg_rtx;
13282 HOST_WIDE_INT offset;
13283 int write_back = FALSE;
13284 int stm_case;
13285 rtx addr;
13286 bool base_reg_dies;
13287 int i, j;
13288 HARD_REG_SET allocated;
13290 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13291 mem_order, &base_reg, &offset, false);
13293 if (stm_case == 0)
13294 return false;
13296 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13298 /* If the same register is used more than once, try to find a free
13299 register. */
13300 CLEAR_HARD_REG_SET (allocated);
13301 for (i = 0; i < nops; i++)
13303 for (j = i + 1; j < nops; j++)
13304 if (regs[i] == regs[j])
13306 rtx t = peep2_find_free_register (0, nops * 2,
13307 TARGET_THUMB1 ? "l" : "r",
13308 SImode, &allocated);
13309 if (t == NULL_RTX)
13310 return false;
13311 reg_rtxs[i] = t;
13312 regs[i] = REGNO (t);
13316 /* Compute an ordering that maps the register numbers to an ascending
13317 sequence. */
13318 reg_order[0] = 0;
13319 for (i = 0; i < nops; i++)
13320 if (regs[i] < regs[reg_order[0]])
13321 reg_order[0] = i;
13323 for (i = 1; i < nops; i++)
13325 int this_order = reg_order[i - 1];
13326 for (j = 0; j < nops; j++)
13327 if (regs[j] > regs[reg_order[i - 1]]
13328 && (this_order == reg_order[i - 1]
13329 || regs[j] < regs[this_order]))
13330 this_order = j;
13331 reg_order[i] = this_order;
13334 /* Ensure that registers that must be live after the instruction end
13335 up with the correct value. */
13336 for (i = 0; i < nops; i++)
13338 int this_order = reg_order[i];
13339 if ((this_order != mem_order[i]
13340 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13341 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13342 return false;
13345 /* Load the constants. */
13346 for (i = 0; i < nops; i++)
13348 rtx op = operands[2 * nops + mem_order[i]];
13349 sorted_regs[i] = regs[reg_order[i]];
13350 emit_move_insn (reg_rtxs[reg_order[i]], op);
13353 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13355 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13356 if (TARGET_THUMB1)
13358 gcc_assert (base_reg_dies);
13359 write_back = TRUE;
13362 if (stm_case == 5)
13364 gcc_assert (base_reg_dies);
13365 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13366 offset = 0;
13369 addr = plus_constant (Pmode, base_reg_rtx, offset);
13371 for (i = 0; i < nops; i++)
13373 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13374 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13375 SImode, addr, 0);
13377 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13378 write_back ? offset + i * 4 : 0));
13379 return true;
13382 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13383 unaligned copies on processors which support unaligned semantics for those
13384 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13385 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13386 An interleave factor of 1 (the minimum) will perform no interleaving.
13387 Load/store multiple are used for aligned addresses where possible. */
13389 static void
13390 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13391 HOST_WIDE_INT length,
13392 unsigned int interleave_factor)
13394 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13395 int *regnos = XALLOCAVEC (int, interleave_factor);
13396 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13397 HOST_WIDE_INT i, j;
13398 HOST_WIDE_INT remaining = length, words;
13399 rtx halfword_tmp = NULL, byte_tmp = NULL;
13400 rtx dst, src;
13401 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13402 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13403 HOST_WIDE_INT srcoffset, dstoffset;
13404 HOST_WIDE_INT src_autoinc, dst_autoinc;
13405 rtx mem, addr;
13407 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13409 /* Use hard registers if we have aligned source or destination so we can use
13410 load/store multiple with contiguous registers. */
13411 if (dst_aligned || src_aligned)
13412 for (i = 0; i < interleave_factor; i++)
13413 regs[i] = gen_rtx_REG (SImode, i);
13414 else
13415 for (i = 0; i < interleave_factor; i++)
13416 regs[i] = gen_reg_rtx (SImode);
13418 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13419 src = copy_addr_to_reg (XEXP (srcbase, 0));
13421 srcoffset = dstoffset = 0;
13423 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13424 For copying the last bytes we want to subtract this offset again. */
13425 src_autoinc = dst_autoinc = 0;
13427 for (i = 0; i < interleave_factor; i++)
13428 regnos[i] = i;
13430 /* Copy BLOCK_SIZE_BYTES chunks. */
13432 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13434 /* Load words. */
13435 if (src_aligned && interleave_factor > 1)
13437 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13438 TRUE, srcbase, &srcoffset));
13439 src_autoinc += UNITS_PER_WORD * interleave_factor;
13441 else
13443 for (j = 0; j < interleave_factor; j++)
13445 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13446 - src_autoinc));
13447 mem = adjust_automodify_address (srcbase, SImode, addr,
13448 srcoffset + j * UNITS_PER_WORD);
13449 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13451 srcoffset += block_size_bytes;
13454 /* Store words. */
13455 if (dst_aligned && interleave_factor > 1)
13457 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13458 TRUE, dstbase, &dstoffset));
13459 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13461 else
13463 for (j = 0; j < interleave_factor; j++)
13465 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13466 - dst_autoinc));
13467 mem = adjust_automodify_address (dstbase, SImode, addr,
13468 dstoffset + j * UNITS_PER_WORD);
13469 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13471 dstoffset += block_size_bytes;
13474 remaining -= block_size_bytes;
13477 /* Copy any whole words left (note these aren't interleaved with any
13478 subsequent halfword/byte load/stores in the interests of simplicity). */
13480 words = remaining / UNITS_PER_WORD;
13482 gcc_assert (words < interleave_factor);
13484 if (src_aligned && words > 1)
13486 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13487 &srcoffset));
13488 src_autoinc += UNITS_PER_WORD * words;
13490 else
13492 for (j = 0; j < words; j++)
13494 addr = plus_constant (Pmode, src,
13495 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13496 mem = adjust_automodify_address (srcbase, SImode, addr,
13497 srcoffset + j * UNITS_PER_WORD);
13498 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13500 srcoffset += words * UNITS_PER_WORD;
13503 if (dst_aligned && words > 1)
13505 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13506 &dstoffset));
13507 dst_autoinc += words * UNITS_PER_WORD;
13509 else
13511 for (j = 0; j < words; j++)
13513 addr = plus_constant (Pmode, dst,
13514 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13515 mem = adjust_automodify_address (dstbase, SImode, addr,
13516 dstoffset + j * UNITS_PER_WORD);
13517 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13519 dstoffset += words * UNITS_PER_WORD;
13522 remaining -= words * UNITS_PER_WORD;
13524 gcc_assert (remaining < 4);
13526 /* Copy a halfword if necessary. */
13528 if (remaining >= 2)
13530 halfword_tmp = gen_reg_rtx (SImode);
13532 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13533 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13534 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13536 /* Either write out immediately, or delay until we've loaded the last
13537 byte, depending on interleave factor. */
13538 if (interleave_factor == 1)
13540 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13541 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13542 emit_insn (gen_unaligned_storehi (mem,
13543 gen_lowpart (HImode, halfword_tmp)));
13544 halfword_tmp = NULL;
13545 dstoffset += 2;
13548 remaining -= 2;
13549 srcoffset += 2;
13552 gcc_assert (remaining < 2);
13554 /* Copy last byte. */
13556 if ((remaining & 1) != 0)
13558 byte_tmp = gen_reg_rtx (SImode);
13560 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13561 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13562 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13564 if (interleave_factor == 1)
13566 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13567 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13568 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13569 byte_tmp = NULL;
13570 dstoffset++;
13573 remaining--;
13574 srcoffset++;
13577 /* Store last halfword if we haven't done so already. */
13579 if (halfword_tmp)
13581 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13582 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13583 emit_insn (gen_unaligned_storehi (mem,
13584 gen_lowpart (HImode, halfword_tmp)));
13585 dstoffset += 2;
13588 /* Likewise for last byte. */
13590 if (byte_tmp)
13592 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13593 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13594 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13595 dstoffset++;
13598 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13601 /* From mips_adjust_block_mem:
13603 Helper function for doing a loop-based block operation on memory
13604 reference MEM. Each iteration of the loop will operate on LENGTH
13605 bytes of MEM.
13607 Create a new base register for use within the loop and point it to
13608 the start of MEM. Create a new memory reference that uses this
13609 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13611 static void
13612 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13613 rtx *loop_mem)
13615 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13617 /* Although the new mem does not refer to a known location,
13618 it does keep up to LENGTH bytes of alignment. */
13619 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13620 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13623 /* From mips_block_move_loop:
13625 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13626 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13627 the memory regions do not overlap. */
13629 static void
13630 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13631 unsigned int interleave_factor,
13632 HOST_WIDE_INT bytes_per_iter)
13634 rtx label, src_reg, dest_reg, final_src, test;
13635 HOST_WIDE_INT leftover;
13637 leftover = length % bytes_per_iter;
13638 length -= leftover;
13640 /* Create registers and memory references for use within the loop. */
13641 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13642 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13644 /* Calculate the value that SRC_REG should have after the last iteration of
13645 the loop. */
13646 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13647 0, 0, OPTAB_WIDEN);
13649 /* Emit the start of the loop. */
13650 label = gen_label_rtx ();
13651 emit_label (label);
13653 /* Emit the loop body. */
13654 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13655 interleave_factor);
13657 /* Move on to the next block. */
13658 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13659 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13661 /* Emit the loop condition. */
13662 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13663 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13665 /* Mop up any left-over bytes. */
13666 if (leftover)
13667 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13670 /* Emit a block move when either the source or destination is unaligned (not
13671 aligned to a four-byte boundary). This may need further tuning depending on
13672 core type, optimize_size setting, etc. */
13674 static int
13675 arm_movmemqi_unaligned (rtx *operands)
13677 HOST_WIDE_INT length = INTVAL (operands[2]);
13679 if (optimize_size)
13681 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
13682 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
13683 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13684 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13685 or dst_aligned though: allow more interleaving in those cases since the
13686 resulting code can be smaller. */
13687 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
13688 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
13690 if (length > 12)
13691 arm_block_move_unaligned_loop (operands[0], operands[1], length,
13692 interleave_factor, bytes_per_iter);
13693 else
13694 arm_block_move_unaligned_straight (operands[0], operands[1], length,
13695 interleave_factor);
13697 else
13699 /* Note that the loop created by arm_block_move_unaligned_loop may be
13700 subject to loop unrolling, which makes tuning this condition a little
13701 redundant. */
13702 if (length > 32)
13703 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
13704 else
13705 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
13708 return 1;
13712 arm_gen_movmemqi (rtx *operands)
13714 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
13715 HOST_WIDE_INT srcoffset, dstoffset;
13716 int i;
13717 rtx src, dst, srcbase, dstbase;
13718 rtx part_bytes_reg = NULL;
13719 rtx mem;
13721 if (!CONST_INT_P (operands[2])
13722 || !CONST_INT_P (operands[3])
13723 || INTVAL (operands[2]) > 64)
13724 return 0;
13726 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
13727 return arm_movmemqi_unaligned (operands);
13729 if (INTVAL (operands[3]) & 3)
13730 return 0;
13732 dstbase = operands[0];
13733 srcbase = operands[1];
13735 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
13736 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
13738 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
13739 out_words_to_go = INTVAL (operands[2]) / 4;
13740 last_bytes = INTVAL (operands[2]) & 3;
13741 dstoffset = srcoffset = 0;
13743 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
13744 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
13746 for (i = 0; in_words_to_go >= 2; i+=4)
13748 if (in_words_to_go > 4)
13749 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
13750 TRUE, srcbase, &srcoffset));
13751 else
13752 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
13753 src, FALSE, srcbase,
13754 &srcoffset));
13756 if (out_words_to_go)
13758 if (out_words_to_go > 4)
13759 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
13760 TRUE, dstbase, &dstoffset));
13761 else if (out_words_to_go != 1)
13762 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
13763 out_words_to_go, dst,
13764 (last_bytes == 0
13765 ? FALSE : TRUE),
13766 dstbase, &dstoffset));
13767 else
13769 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13770 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
13771 if (last_bytes != 0)
13773 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
13774 dstoffset += 4;
13779 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
13780 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
13783 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13784 if (out_words_to_go)
13786 rtx sreg;
13788 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13789 sreg = copy_to_reg (mem);
13791 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13792 emit_move_insn (mem, sreg);
13793 in_words_to_go--;
13795 gcc_assert (!in_words_to_go); /* Sanity check */
13798 if (in_words_to_go)
13800 gcc_assert (in_words_to_go > 0);
13802 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
13803 part_bytes_reg = copy_to_mode_reg (SImode, mem);
13806 gcc_assert (!last_bytes || part_bytes_reg);
13808 if (BYTES_BIG_ENDIAN && last_bytes)
13810 rtx tmp = gen_reg_rtx (SImode);
13812 /* The bytes we want are in the top end of the word. */
13813 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
13814 GEN_INT (8 * (4 - last_bytes))));
13815 part_bytes_reg = tmp;
13817 while (last_bytes)
13819 mem = adjust_automodify_address (dstbase, QImode,
13820 plus_constant (Pmode, dst,
13821 last_bytes - 1),
13822 dstoffset + last_bytes - 1);
13823 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13825 if (--last_bytes)
13827 tmp = gen_reg_rtx (SImode);
13828 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
13829 part_bytes_reg = tmp;
13834 else
13836 if (last_bytes > 1)
13838 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
13839 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
13840 last_bytes -= 2;
13841 if (last_bytes)
13843 rtx tmp = gen_reg_rtx (SImode);
13844 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
13845 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
13846 part_bytes_reg = tmp;
13847 dstoffset += 2;
13851 if (last_bytes)
13853 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
13854 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
13858 return 1;
13861 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13862 by mode size. */
13863 inline static rtx
13864 next_consecutive_mem (rtx mem)
13866 enum machine_mode mode = GET_MODE (mem);
13867 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
13868 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
13870 return adjust_automodify_address (mem, mode, addr, offset);
13873 /* Copy using LDRD/STRD instructions whenever possible.
13874 Returns true upon success. */
13875 bool
13876 gen_movmem_ldrd_strd (rtx *operands)
13878 unsigned HOST_WIDE_INT len;
13879 HOST_WIDE_INT align;
13880 rtx src, dst, base;
13881 rtx reg0;
13882 bool src_aligned, dst_aligned;
13883 bool src_volatile, dst_volatile;
13885 gcc_assert (CONST_INT_P (operands[2]));
13886 gcc_assert (CONST_INT_P (operands[3]));
13888 len = UINTVAL (operands[2]);
13889 if (len > 64)
13890 return false;
13892 /* Maximum alignment we can assume for both src and dst buffers. */
13893 align = INTVAL (operands[3]);
13895 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
13896 return false;
13898 /* Place src and dst addresses in registers
13899 and update the corresponding mem rtx. */
13900 dst = operands[0];
13901 dst_volatile = MEM_VOLATILE_P (dst);
13902 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
13903 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
13904 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13906 src = operands[1];
13907 src_volatile = MEM_VOLATILE_P (src);
13908 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
13909 base = copy_to_mode_reg (SImode, XEXP (src, 0));
13910 src = adjust_automodify_address (src, VOIDmode, base, 0);
13912 if (!unaligned_access && !(src_aligned && dst_aligned))
13913 return false;
13915 if (src_volatile || dst_volatile)
13916 return false;
13918 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13919 if (!(dst_aligned || src_aligned))
13920 return arm_gen_movmemqi (operands);
13922 src = adjust_address (src, DImode, 0);
13923 dst = adjust_address (dst, DImode, 0);
13924 while (len >= 8)
13926 len -= 8;
13927 reg0 = gen_reg_rtx (DImode);
13928 if (src_aligned)
13929 emit_move_insn (reg0, src);
13930 else
13931 emit_insn (gen_unaligned_loaddi (reg0, src));
13933 if (dst_aligned)
13934 emit_move_insn (dst, reg0);
13935 else
13936 emit_insn (gen_unaligned_storedi (dst, reg0));
13938 src = next_consecutive_mem (src);
13939 dst = next_consecutive_mem (dst);
13942 gcc_assert (len < 8);
13943 if (len >= 4)
13945 /* More than a word but less than a double-word to copy. Copy a word. */
13946 reg0 = gen_reg_rtx (SImode);
13947 src = adjust_address (src, SImode, 0);
13948 dst = adjust_address (dst, SImode, 0);
13949 if (src_aligned)
13950 emit_move_insn (reg0, src);
13951 else
13952 emit_insn (gen_unaligned_loadsi (reg0, src));
13954 if (dst_aligned)
13955 emit_move_insn (dst, reg0);
13956 else
13957 emit_insn (gen_unaligned_storesi (dst, reg0));
13959 src = next_consecutive_mem (src);
13960 dst = next_consecutive_mem (dst);
13961 len -= 4;
13964 if (len == 0)
13965 return true;
13967 /* Copy the remaining bytes. */
13968 if (len >= 2)
13970 dst = adjust_address (dst, HImode, 0);
13971 src = adjust_address (src, HImode, 0);
13972 reg0 = gen_reg_rtx (SImode);
13973 if (src_aligned)
13974 emit_insn (gen_zero_extendhisi2 (reg0, src));
13975 else
13976 emit_insn (gen_unaligned_loadhiu (reg0, src));
13978 if (dst_aligned)
13979 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
13980 else
13981 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
13983 src = next_consecutive_mem (src);
13984 dst = next_consecutive_mem (dst);
13985 if (len == 2)
13986 return true;
13989 dst = adjust_address (dst, QImode, 0);
13990 src = adjust_address (src, QImode, 0);
13991 reg0 = gen_reg_rtx (QImode);
13992 emit_move_insn (reg0, src);
13993 emit_move_insn (dst, reg0);
13994 return true;
13997 /* Select a dominance comparison mode if possible for a test of the general
13998 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13999 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14000 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14001 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14002 In all cases OP will be either EQ or NE, but we don't need to know which
14003 here. If we are unable to support a dominance comparison we return
14004 CC mode. This will then fail to match for the RTL expressions that
14005 generate this call. */
14006 enum machine_mode
14007 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14009 enum rtx_code cond1, cond2;
14010 int swapped = 0;
14012 /* Currently we will probably get the wrong result if the individual
14013 comparisons are not simple. This also ensures that it is safe to
14014 reverse a comparison if necessary. */
14015 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14016 != CCmode)
14017 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14018 != CCmode))
14019 return CCmode;
14021 /* The if_then_else variant of this tests the second condition if the
14022 first passes, but is true if the first fails. Reverse the first
14023 condition to get a true "inclusive-or" expression. */
14024 if (cond_or == DOM_CC_NX_OR_Y)
14025 cond1 = reverse_condition (cond1);
14027 /* If the comparisons are not equal, and one doesn't dominate the other,
14028 then we can't do this. */
14029 if (cond1 != cond2
14030 && !comparison_dominates_p (cond1, cond2)
14031 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14032 return CCmode;
14034 if (swapped)
14036 enum rtx_code temp = cond1;
14037 cond1 = cond2;
14038 cond2 = temp;
14041 switch (cond1)
14043 case EQ:
14044 if (cond_or == DOM_CC_X_AND_Y)
14045 return CC_DEQmode;
14047 switch (cond2)
14049 case EQ: return CC_DEQmode;
14050 case LE: return CC_DLEmode;
14051 case LEU: return CC_DLEUmode;
14052 case GE: return CC_DGEmode;
14053 case GEU: return CC_DGEUmode;
14054 default: gcc_unreachable ();
14057 case LT:
14058 if (cond_or == DOM_CC_X_AND_Y)
14059 return CC_DLTmode;
14061 switch (cond2)
14063 case LT:
14064 return CC_DLTmode;
14065 case LE:
14066 return CC_DLEmode;
14067 case NE:
14068 return CC_DNEmode;
14069 default:
14070 gcc_unreachable ();
14073 case GT:
14074 if (cond_or == DOM_CC_X_AND_Y)
14075 return CC_DGTmode;
14077 switch (cond2)
14079 case GT:
14080 return CC_DGTmode;
14081 case GE:
14082 return CC_DGEmode;
14083 case NE:
14084 return CC_DNEmode;
14085 default:
14086 gcc_unreachable ();
14089 case LTU:
14090 if (cond_or == DOM_CC_X_AND_Y)
14091 return CC_DLTUmode;
14093 switch (cond2)
14095 case LTU:
14096 return CC_DLTUmode;
14097 case LEU:
14098 return CC_DLEUmode;
14099 case NE:
14100 return CC_DNEmode;
14101 default:
14102 gcc_unreachable ();
14105 case GTU:
14106 if (cond_or == DOM_CC_X_AND_Y)
14107 return CC_DGTUmode;
14109 switch (cond2)
14111 case GTU:
14112 return CC_DGTUmode;
14113 case GEU:
14114 return CC_DGEUmode;
14115 case NE:
14116 return CC_DNEmode;
14117 default:
14118 gcc_unreachable ();
14121 /* The remaining cases only occur when both comparisons are the
14122 same. */
14123 case NE:
14124 gcc_assert (cond1 == cond2);
14125 return CC_DNEmode;
14127 case LE:
14128 gcc_assert (cond1 == cond2);
14129 return CC_DLEmode;
14131 case GE:
14132 gcc_assert (cond1 == cond2);
14133 return CC_DGEmode;
14135 case LEU:
14136 gcc_assert (cond1 == cond2);
14137 return CC_DLEUmode;
14139 case GEU:
14140 gcc_assert (cond1 == cond2);
14141 return CC_DGEUmode;
14143 default:
14144 gcc_unreachable ();
14148 enum machine_mode
14149 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14151 /* All floating point compares return CCFP if it is an equality
14152 comparison, and CCFPE otherwise. */
14153 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14155 switch (op)
14157 case EQ:
14158 case NE:
14159 case UNORDERED:
14160 case ORDERED:
14161 case UNLT:
14162 case UNLE:
14163 case UNGT:
14164 case UNGE:
14165 case UNEQ:
14166 case LTGT:
14167 return CCFPmode;
14169 case LT:
14170 case LE:
14171 case GT:
14172 case GE:
14173 return CCFPEmode;
14175 default:
14176 gcc_unreachable ();
14180 /* A compare with a shifted operand. Because of canonicalization, the
14181 comparison will have to be swapped when we emit the assembler. */
14182 if (GET_MODE (y) == SImode
14183 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14184 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14185 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14186 || GET_CODE (x) == ROTATERT))
14187 return CC_SWPmode;
14189 /* This operation is performed swapped, but since we only rely on the Z
14190 flag we don't need an additional mode. */
14191 if (GET_MODE (y) == SImode
14192 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14193 && GET_CODE (x) == NEG
14194 && (op == EQ || op == NE))
14195 return CC_Zmode;
14197 /* This is a special case that is used by combine to allow a
14198 comparison of a shifted byte load to be split into a zero-extend
14199 followed by a comparison of the shifted integer (only valid for
14200 equalities and unsigned inequalities). */
14201 if (GET_MODE (x) == SImode
14202 && GET_CODE (x) == ASHIFT
14203 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14204 && GET_CODE (XEXP (x, 0)) == SUBREG
14205 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14206 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14207 && (op == EQ || op == NE
14208 || op == GEU || op == GTU || op == LTU || op == LEU)
14209 && CONST_INT_P (y))
14210 return CC_Zmode;
14212 /* A construct for a conditional compare, if the false arm contains
14213 0, then both conditions must be true, otherwise either condition
14214 must be true. Not all conditions are possible, so CCmode is
14215 returned if it can't be done. */
14216 if (GET_CODE (x) == IF_THEN_ELSE
14217 && (XEXP (x, 2) == const0_rtx
14218 || XEXP (x, 2) == const1_rtx)
14219 && COMPARISON_P (XEXP (x, 0))
14220 && COMPARISON_P (XEXP (x, 1)))
14221 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14222 INTVAL (XEXP (x, 2)));
14224 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14225 if (GET_CODE (x) == AND
14226 && (op == EQ || op == NE)
14227 && COMPARISON_P (XEXP (x, 0))
14228 && COMPARISON_P (XEXP (x, 1)))
14229 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14230 DOM_CC_X_AND_Y);
14232 if (GET_CODE (x) == IOR
14233 && (op == EQ || op == NE)
14234 && COMPARISON_P (XEXP (x, 0))
14235 && COMPARISON_P (XEXP (x, 1)))
14236 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14237 DOM_CC_X_OR_Y);
14239 /* An operation (on Thumb) where we want to test for a single bit.
14240 This is done by shifting that bit up into the top bit of a
14241 scratch register; we can then branch on the sign bit. */
14242 if (TARGET_THUMB1
14243 && GET_MODE (x) == SImode
14244 && (op == EQ || op == NE)
14245 && GET_CODE (x) == ZERO_EXTRACT
14246 && XEXP (x, 1) == const1_rtx)
14247 return CC_Nmode;
14249 /* An operation that sets the condition codes as a side-effect, the
14250 V flag is not set correctly, so we can only use comparisons where
14251 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14252 instead.) */
14253 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14254 if (GET_MODE (x) == SImode
14255 && y == const0_rtx
14256 && (op == EQ || op == NE || op == LT || op == GE)
14257 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14258 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14259 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14260 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14261 || GET_CODE (x) == LSHIFTRT
14262 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14263 || GET_CODE (x) == ROTATERT
14264 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14265 return CC_NOOVmode;
14267 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14268 return CC_Zmode;
14270 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14271 && GET_CODE (x) == PLUS
14272 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14273 return CC_Cmode;
14275 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14277 switch (op)
14279 case EQ:
14280 case NE:
14281 /* A DImode comparison against zero can be implemented by
14282 or'ing the two halves together. */
14283 if (y == const0_rtx)
14284 return CC_Zmode;
14286 /* We can do an equality test in three Thumb instructions. */
14287 if (!TARGET_32BIT)
14288 return CC_Zmode;
14290 /* FALLTHROUGH */
14292 case LTU:
14293 case LEU:
14294 case GTU:
14295 case GEU:
14296 /* DImode unsigned comparisons can be implemented by cmp +
14297 cmpeq without a scratch register. Not worth doing in
14298 Thumb-2. */
14299 if (TARGET_32BIT)
14300 return CC_CZmode;
14302 /* FALLTHROUGH */
14304 case LT:
14305 case LE:
14306 case GT:
14307 case GE:
14308 /* DImode signed and unsigned comparisons can be implemented
14309 by cmp + sbcs with a scratch register, but that does not
14310 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14311 gcc_assert (op != EQ && op != NE);
14312 return CC_NCVmode;
14314 default:
14315 gcc_unreachable ();
14319 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14320 return GET_MODE (x);
14322 return CCmode;
14325 /* X and Y are two things to compare using CODE. Emit the compare insn and
14326 return the rtx for register 0 in the proper mode. FP means this is a
14327 floating point compare: I don't think that it is needed on the arm. */
14329 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14331 enum machine_mode mode;
14332 rtx cc_reg;
14333 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14335 /* We might have X as a constant, Y as a register because of the predicates
14336 used for cmpdi. If so, force X to a register here. */
14337 if (dimode_comparison && !REG_P (x))
14338 x = force_reg (DImode, x);
14340 mode = SELECT_CC_MODE (code, x, y);
14341 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14343 if (dimode_comparison
14344 && mode != CC_CZmode)
14346 rtx clobber, set;
14348 /* To compare two non-zero values for equality, XOR them and
14349 then compare against zero. Not used for ARM mode; there
14350 CC_CZmode is cheaper. */
14351 if (mode == CC_Zmode && y != const0_rtx)
14353 gcc_assert (!reload_completed);
14354 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14355 y = const0_rtx;
14358 /* A scratch register is required. */
14359 if (reload_completed)
14360 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14361 else
14362 scratch = gen_rtx_SCRATCH (SImode);
14364 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14365 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14366 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14368 else
14369 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14371 return cc_reg;
14374 /* Generate a sequence of insns that will generate the correct return
14375 address mask depending on the physical architecture that the program
14376 is running on. */
14378 arm_gen_return_addr_mask (void)
14380 rtx reg = gen_reg_rtx (Pmode);
14382 emit_insn (gen_return_addr_mask (reg));
14383 return reg;
14386 void
14387 arm_reload_in_hi (rtx *operands)
14389 rtx ref = operands[1];
14390 rtx base, scratch;
14391 HOST_WIDE_INT offset = 0;
14393 if (GET_CODE (ref) == SUBREG)
14395 offset = SUBREG_BYTE (ref);
14396 ref = SUBREG_REG (ref);
14399 if (REG_P (ref))
14401 /* We have a pseudo which has been spilt onto the stack; there
14402 are two cases here: the first where there is a simple
14403 stack-slot replacement and a second where the stack-slot is
14404 out of range, or is used as a subreg. */
14405 if (reg_equiv_mem (REGNO (ref)))
14407 ref = reg_equiv_mem (REGNO (ref));
14408 base = find_replacement (&XEXP (ref, 0));
14410 else
14411 /* The slot is out of range, or was dressed up in a SUBREG. */
14412 base = reg_equiv_address (REGNO (ref));
14414 else
14415 base = find_replacement (&XEXP (ref, 0));
14417 /* Handle the case where the address is too complex to be offset by 1. */
14418 if (GET_CODE (base) == MINUS
14419 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14421 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14423 emit_set_insn (base_plus, base);
14424 base = base_plus;
14426 else if (GET_CODE (base) == PLUS)
14428 /* The addend must be CONST_INT, or we would have dealt with it above. */
14429 HOST_WIDE_INT hi, lo;
14431 offset += INTVAL (XEXP (base, 1));
14432 base = XEXP (base, 0);
14434 /* Rework the address into a legal sequence of insns. */
14435 /* Valid range for lo is -4095 -> 4095 */
14436 lo = (offset >= 0
14437 ? (offset & 0xfff)
14438 : -((-offset) & 0xfff));
14440 /* Corner case, if lo is the max offset then we would be out of range
14441 once we have added the additional 1 below, so bump the msb into the
14442 pre-loading insn(s). */
14443 if (lo == 4095)
14444 lo &= 0x7ff;
14446 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14447 ^ (HOST_WIDE_INT) 0x80000000)
14448 - (HOST_WIDE_INT) 0x80000000);
14450 gcc_assert (hi + lo == offset);
14452 if (hi != 0)
14454 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14456 /* Get the base address; addsi3 knows how to handle constants
14457 that require more than one insn. */
14458 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14459 base = base_plus;
14460 offset = lo;
14464 /* Operands[2] may overlap operands[0] (though it won't overlap
14465 operands[1]), that's why we asked for a DImode reg -- so we can
14466 use the bit that does not overlap. */
14467 if (REGNO (operands[2]) == REGNO (operands[0]))
14468 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14469 else
14470 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14472 emit_insn (gen_zero_extendqisi2 (scratch,
14473 gen_rtx_MEM (QImode,
14474 plus_constant (Pmode, base,
14475 offset))));
14476 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14477 gen_rtx_MEM (QImode,
14478 plus_constant (Pmode, base,
14479 offset + 1))));
14480 if (!BYTES_BIG_ENDIAN)
14481 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14482 gen_rtx_IOR (SImode,
14483 gen_rtx_ASHIFT
14484 (SImode,
14485 gen_rtx_SUBREG (SImode, operands[0], 0),
14486 GEN_INT (8)),
14487 scratch));
14488 else
14489 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14490 gen_rtx_IOR (SImode,
14491 gen_rtx_ASHIFT (SImode, scratch,
14492 GEN_INT (8)),
14493 gen_rtx_SUBREG (SImode, operands[0], 0)));
14496 /* Handle storing a half-word to memory during reload by synthesizing as two
14497 byte stores. Take care not to clobber the input values until after we
14498 have moved them somewhere safe. This code assumes that if the DImode
14499 scratch in operands[2] overlaps either the input value or output address
14500 in some way, then that value must die in this insn (we absolutely need
14501 two scratch registers for some corner cases). */
14502 void
14503 arm_reload_out_hi (rtx *operands)
14505 rtx ref = operands[0];
14506 rtx outval = operands[1];
14507 rtx base, scratch;
14508 HOST_WIDE_INT offset = 0;
14510 if (GET_CODE (ref) == SUBREG)
14512 offset = SUBREG_BYTE (ref);
14513 ref = SUBREG_REG (ref);
14516 if (REG_P (ref))
14518 /* We have a pseudo which has been spilt onto the stack; there
14519 are two cases here: the first where there is a simple
14520 stack-slot replacement and a second where the stack-slot is
14521 out of range, or is used as a subreg. */
14522 if (reg_equiv_mem (REGNO (ref)))
14524 ref = reg_equiv_mem (REGNO (ref));
14525 base = find_replacement (&XEXP (ref, 0));
14527 else
14528 /* The slot is out of range, or was dressed up in a SUBREG. */
14529 base = reg_equiv_address (REGNO (ref));
14531 else
14532 base = find_replacement (&XEXP (ref, 0));
14534 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14536 /* Handle the case where the address is too complex to be offset by 1. */
14537 if (GET_CODE (base) == MINUS
14538 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14540 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14542 /* Be careful not to destroy OUTVAL. */
14543 if (reg_overlap_mentioned_p (base_plus, outval))
14545 /* Updating base_plus might destroy outval, see if we can
14546 swap the scratch and base_plus. */
14547 if (!reg_overlap_mentioned_p (scratch, outval))
14549 rtx tmp = scratch;
14550 scratch = base_plus;
14551 base_plus = tmp;
14553 else
14555 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14557 /* Be conservative and copy OUTVAL into the scratch now,
14558 this should only be necessary if outval is a subreg
14559 of something larger than a word. */
14560 /* XXX Might this clobber base? I can't see how it can,
14561 since scratch is known to overlap with OUTVAL, and
14562 must be wider than a word. */
14563 emit_insn (gen_movhi (scratch_hi, outval));
14564 outval = scratch_hi;
14568 emit_set_insn (base_plus, base);
14569 base = base_plus;
14571 else if (GET_CODE (base) == PLUS)
14573 /* The addend must be CONST_INT, or we would have dealt with it above. */
14574 HOST_WIDE_INT hi, lo;
14576 offset += INTVAL (XEXP (base, 1));
14577 base = XEXP (base, 0);
14579 /* Rework the address into a legal sequence of insns. */
14580 /* Valid range for lo is -4095 -> 4095 */
14581 lo = (offset >= 0
14582 ? (offset & 0xfff)
14583 : -((-offset) & 0xfff));
14585 /* Corner case, if lo is the max offset then we would be out of range
14586 once we have added the additional 1 below, so bump the msb into the
14587 pre-loading insn(s). */
14588 if (lo == 4095)
14589 lo &= 0x7ff;
14591 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14592 ^ (HOST_WIDE_INT) 0x80000000)
14593 - (HOST_WIDE_INT) 0x80000000);
14595 gcc_assert (hi + lo == offset);
14597 if (hi != 0)
14599 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14601 /* Be careful not to destroy OUTVAL. */
14602 if (reg_overlap_mentioned_p (base_plus, outval))
14604 /* Updating base_plus might destroy outval, see if we
14605 can swap the scratch and base_plus. */
14606 if (!reg_overlap_mentioned_p (scratch, outval))
14608 rtx tmp = scratch;
14609 scratch = base_plus;
14610 base_plus = tmp;
14612 else
14614 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14616 /* Be conservative and copy outval into scratch now,
14617 this should only be necessary if outval is a
14618 subreg of something larger than a word. */
14619 /* XXX Might this clobber base? I can't see how it
14620 can, since scratch is known to overlap with
14621 outval. */
14622 emit_insn (gen_movhi (scratch_hi, outval));
14623 outval = scratch_hi;
14627 /* Get the base address; addsi3 knows how to handle constants
14628 that require more than one insn. */
14629 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14630 base = base_plus;
14631 offset = lo;
14635 if (BYTES_BIG_ENDIAN)
14637 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14638 plus_constant (Pmode, base,
14639 offset + 1)),
14640 gen_lowpart (QImode, outval)));
14641 emit_insn (gen_lshrsi3 (scratch,
14642 gen_rtx_SUBREG (SImode, outval, 0),
14643 GEN_INT (8)));
14644 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14645 offset)),
14646 gen_lowpart (QImode, scratch)));
14648 else
14650 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14651 offset)),
14652 gen_lowpart (QImode, outval)));
14653 emit_insn (gen_lshrsi3 (scratch,
14654 gen_rtx_SUBREG (SImode, outval, 0),
14655 GEN_INT (8)));
14656 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14657 plus_constant (Pmode, base,
14658 offset + 1)),
14659 gen_lowpart (QImode, scratch)));
14663 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14664 (padded to the size of a word) should be passed in a register. */
14666 static bool
14667 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
14669 if (TARGET_AAPCS_BASED)
14670 return must_pass_in_stack_var_size (mode, type);
14671 else
14672 return must_pass_in_stack_var_size_or_pad (mode, type);
14676 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14677 Return true if an argument passed on the stack should be padded upwards,
14678 i.e. if the least-significant byte has useful data.
14679 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14680 aggregate types are placed in the lowest memory address. */
14682 bool
14683 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
14685 if (!TARGET_AAPCS_BASED)
14686 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
14688 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
14689 return false;
14691 return true;
14695 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14696 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14697 register has useful data, and return the opposite if the most
14698 significant byte does. */
14700 bool
14701 arm_pad_reg_upward (enum machine_mode mode,
14702 tree type, int first ATTRIBUTE_UNUSED)
14704 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
14706 /* For AAPCS, small aggregates, small fixed-point types,
14707 and small complex types are always padded upwards. */
14708 if (type)
14710 if ((AGGREGATE_TYPE_P (type)
14711 || TREE_CODE (type) == COMPLEX_TYPE
14712 || FIXED_POINT_TYPE_P (type))
14713 && int_size_in_bytes (type) <= 4)
14714 return true;
14716 else
14718 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
14719 && GET_MODE_SIZE (mode) <= 4)
14720 return true;
14724 /* Otherwise, use default padding. */
14725 return !BYTES_BIG_ENDIAN;
14728 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14729 assuming that the address in the base register is word aligned. */
14730 bool
14731 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
14733 HOST_WIDE_INT max_offset;
14735 /* Offset must be a multiple of 4 in Thumb mode. */
14736 if (TARGET_THUMB2 && ((offset & 3) != 0))
14737 return false;
14739 if (TARGET_THUMB2)
14740 max_offset = 1020;
14741 else if (TARGET_ARM)
14742 max_offset = 255;
14743 else
14744 return false;
14746 return ((offset <= max_offset) && (offset >= -max_offset));
14749 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14750 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14751 Assumes that the address in the base register RN is word aligned. Pattern
14752 guarantees that both memory accesses use the same base register,
14753 the offsets are constants within the range, and the gap between the offsets is 4.
14754 If preload complete then check that registers are legal. WBACK indicates whether
14755 address is updated. LOAD indicates whether memory access is load or store. */
14756 bool
14757 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
14758 bool wback, bool load)
14760 unsigned int t, t2, n;
14762 if (!reload_completed)
14763 return true;
14765 if (!offset_ok_for_ldrd_strd (offset))
14766 return false;
14768 t = REGNO (rt);
14769 t2 = REGNO (rt2);
14770 n = REGNO (rn);
14772 if ((TARGET_THUMB2)
14773 && ((wback && (n == t || n == t2))
14774 || (t == SP_REGNUM)
14775 || (t == PC_REGNUM)
14776 || (t2 == SP_REGNUM)
14777 || (t2 == PC_REGNUM)
14778 || (!load && (n == PC_REGNUM))
14779 || (load && (t == t2))
14780 /* Triggers Cortex-M3 LDRD errata. */
14781 || (!wback && load && fix_cm3_ldrd && (n == t))))
14782 return false;
14784 if ((TARGET_ARM)
14785 && ((wback && (n == t || n == t2))
14786 || (t2 == PC_REGNUM)
14787 || (t % 2 != 0) /* First destination register is not even. */
14788 || (t2 != t + 1)
14789 /* PC can be used as base register (for offset addressing only),
14790 but it is depricated. */
14791 || (n == PC_REGNUM)))
14792 return false;
14794 return true;
14797 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14798 operand ADDR is an immediate offset from the base register and is
14799 not volatile, in which case it sets BASE and OFFSET
14800 accordingly. */
14801 bool
14802 mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
14804 /* TODO: Handle more general memory operand patterns, such as
14805 PRE_DEC and PRE_INC. */
14807 /* Convert a subreg of mem into mem itself. */
14808 if (GET_CODE (addr) == SUBREG)
14809 addr = alter_subreg (&addr, true);
14811 gcc_assert (MEM_P (addr));
14813 /* Don't modify volatile memory accesses. */
14814 if (MEM_VOLATILE_P (addr))
14815 return false;
14817 *offset = const0_rtx;
14819 addr = XEXP (addr, 0);
14820 if (REG_P (addr))
14822 *base = addr;
14823 return true;
14825 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
14827 *base = XEXP (addr, 0);
14828 *offset = XEXP (addr, 1);
14829 return (REG_P (*base) && CONST_INT_P (*offset));
14832 return false;
14835 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
14837 /* Called from a peephole2 to replace two word-size accesses with a
14838 single LDRD/STRD instruction. Returns true iff we can generate a
14839 new instruction sequence. That is, both accesses use the same base
14840 register and the gap between constant offsets is 4. This function
14841 may reorder its operands to match ldrd/strd RTL templates.
14842 OPERANDS are the operands found by the peephole matcher;
14843 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14844 corresponding memory operands. LOAD indicaates whether the access
14845 is load or store. CONST_STORE indicates a store of constant
14846 integer values held in OPERANDS[4,5] and assumes that the pattern
14847 is of length 4 insn, for the purpose of checking dead registers.
14848 COMMUTE indicates that register operands may be reordered. */
14849 bool
14850 gen_operands_ldrd_strd (rtx *operands, bool load,
14851 bool const_store, bool commute)
14853 int nops = 2;
14854 HOST_WIDE_INT offsets[2], offset;
14855 rtx base = NULL_RTX;
14856 rtx cur_base, cur_offset, tmp;
14857 int i, gap;
14858 HARD_REG_SET regset;
14860 gcc_assert (!const_store || !load);
14861 /* Check that the memory references are immediate offsets from the
14862 same base register. Extract the base register, the destination
14863 registers, and the corresponding memory offsets. */
14864 for (i = 0; i < nops; i++)
14866 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
14867 return false;
14869 if (i == 0)
14870 base = cur_base;
14871 else if (REGNO (base) != REGNO (cur_base))
14872 return false;
14874 offsets[i] = INTVAL (cur_offset);
14875 if (GET_CODE (operands[i]) == SUBREG)
14877 tmp = SUBREG_REG (operands[i]);
14878 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
14879 operands[i] = tmp;
14883 /* Make sure there is no dependency between the individual loads. */
14884 if (load && REGNO (operands[0]) == REGNO (base))
14885 return false; /* RAW */
14887 if (load && REGNO (operands[0]) == REGNO (operands[1]))
14888 return false; /* WAW */
14890 /* If the same input register is used in both stores
14891 when storing different constants, try to find a free register.
14892 For example, the code
14893 mov r0, 0
14894 str r0, [r2]
14895 mov r0, 1
14896 str r0, [r2, #4]
14897 can be transformed into
14898 mov r1, 0
14899 strd r1, r0, [r2]
14900 in Thumb mode assuming that r1 is free. */
14901 if (const_store
14902 && REGNO (operands[0]) == REGNO (operands[1])
14903 && INTVAL (operands[4]) != INTVAL (operands[5]))
14905 if (TARGET_THUMB2)
14907 CLEAR_HARD_REG_SET (regset);
14908 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14909 if (tmp == NULL_RTX)
14910 return false;
14912 /* Use the new register in the first load to ensure that
14913 if the original input register is not dead after peephole,
14914 then it will have the correct constant value. */
14915 operands[0] = tmp;
14917 else if (TARGET_ARM)
14919 return false;
14920 int regno = REGNO (operands[0]);
14921 if (!peep2_reg_dead_p (4, operands[0]))
14923 /* When the input register is even and is not dead after the
14924 pattern, it has to hold the second constant but we cannot
14925 form a legal STRD in ARM mode with this register as the second
14926 register. */
14927 if (regno % 2 == 0)
14928 return false;
14930 /* Is regno-1 free? */
14931 SET_HARD_REG_SET (regset);
14932 CLEAR_HARD_REG_BIT(regset, regno - 1);
14933 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14934 if (tmp == NULL_RTX)
14935 return false;
14937 operands[0] = tmp;
14939 else
14941 /* Find a DImode register. */
14942 CLEAR_HARD_REG_SET (regset);
14943 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
14944 if (tmp != NULL_RTX)
14946 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
14947 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
14949 else
14951 /* Can we use the input register to form a DI register? */
14952 SET_HARD_REG_SET (regset);
14953 CLEAR_HARD_REG_BIT(regset,
14954 regno % 2 == 0 ? regno + 1 : regno - 1);
14955 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
14956 if (tmp == NULL_RTX)
14957 return false;
14958 operands[regno % 2 == 1 ? 0 : 1] = tmp;
14962 gcc_assert (operands[0] != NULL_RTX);
14963 gcc_assert (operands[1] != NULL_RTX);
14964 gcc_assert (REGNO (operands[0]) % 2 == 0);
14965 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
14969 /* Make sure the instructions are ordered with lower memory access first. */
14970 if (offsets[0] > offsets[1])
14972 gap = offsets[0] - offsets[1];
14973 offset = offsets[1];
14975 /* Swap the instructions such that lower memory is accessed first. */
14976 SWAP_RTX (operands[0], operands[1]);
14977 SWAP_RTX (operands[2], operands[3]);
14978 if (const_store)
14979 SWAP_RTX (operands[4], operands[5]);
14981 else
14983 gap = offsets[1] - offsets[0];
14984 offset = offsets[0];
14987 /* Make sure accesses are to consecutive memory locations. */
14988 if (gap != 4)
14989 return false;
14991 /* Make sure we generate legal instructions. */
14992 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
14993 false, load))
14994 return true;
14996 /* In Thumb state, where registers are almost unconstrained, there
14997 is little hope to fix it. */
14998 if (TARGET_THUMB2)
14999 return false;
15001 if (load && commute)
15003 /* Try reordering registers. */
15004 SWAP_RTX (operands[0], operands[1]);
15005 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15006 false, load))
15007 return true;
15010 if (const_store)
15012 /* If input registers are dead after this pattern, they can be
15013 reordered or replaced by other registers that are free in the
15014 current pattern. */
15015 if (!peep2_reg_dead_p (4, operands[0])
15016 || !peep2_reg_dead_p (4, operands[1]))
15017 return false;
15019 /* Try to reorder the input registers. */
15020 /* For example, the code
15021 mov r0, 0
15022 mov r1, 1
15023 str r1, [r2]
15024 str r0, [r2, #4]
15025 can be transformed into
15026 mov r1, 0
15027 mov r0, 1
15028 strd r0, [r2]
15030 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15031 false, false))
15033 SWAP_RTX (operands[0], operands[1]);
15034 return true;
15037 /* Try to find a free DI register. */
15038 CLEAR_HARD_REG_SET (regset);
15039 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15040 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15041 while (true)
15043 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15044 if (tmp == NULL_RTX)
15045 return false;
15047 /* DREG must be an even-numbered register in DImode.
15048 Split it into SI registers. */
15049 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15050 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15051 gcc_assert (operands[0] != NULL_RTX);
15052 gcc_assert (operands[1] != NULL_RTX);
15053 gcc_assert (REGNO (operands[0]) % 2 == 0);
15054 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15056 return (operands_ok_ldrd_strd (operands[0], operands[1],
15057 base, offset,
15058 false, load));
15062 return false;
15064 #undef SWAP_RTX
15069 /* Print a symbolic form of X to the debug file, F. */
15070 static void
15071 arm_print_value (FILE *f, rtx x)
15073 switch (GET_CODE (x))
15075 case CONST_INT:
15076 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15077 return;
15079 case CONST_DOUBLE:
15080 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15081 return;
15083 case CONST_VECTOR:
15085 int i;
15087 fprintf (f, "<");
15088 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15090 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15091 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15092 fputc (',', f);
15094 fprintf (f, ">");
15096 return;
15098 case CONST_STRING:
15099 fprintf (f, "\"%s\"", XSTR (x, 0));
15100 return;
15102 case SYMBOL_REF:
15103 fprintf (f, "`%s'", XSTR (x, 0));
15104 return;
15106 case LABEL_REF:
15107 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15108 return;
15110 case CONST:
15111 arm_print_value (f, XEXP (x, 0));
15112 return;
15114 case PLUS:
15115 arm_print_value (f, XEXP (x, 0));
15116 fprintf (f, "+");
15117 arm_print_value (f, XEXP (x, 1));
15118 return;
15120 case PC:
15121 fprintf (f, "pc");
15122 return;
15124 default:
15125 fprintf (f, "????");
15126 return;
15130 /* Routines for manipulation of the constant pool. */
15132 /* Arm instructions cannot load a large constant directly into a
15133 register; they have to come from a pc relative load. The constant
15134 must therefore be placed in the addressable range of the pc
15135 relative load. Depending on the precise pc relative load
15136 instruction the range is somewhere between 256 bytes and 4k. This
15137 means that we often have to dump a constant inside a function, and
15138 generate code to branch around it.
15140 It is important to minimize this, since the branches will slow
15141 things down and make the code larger.
15143 Normally we can hide the table after an existing unconditional
15144 branch so that there is no interruption of the flow, but in the
15145 worst case the code looks like this:
15147 ldr rn, L1
15149 b L2
15150 align
15151 L1: .long value
15155 ldr rn, L3
15157 b L4
15158 align
15159 L3: .long value
15163 We fix this by performing a scan after scheduling, which notices
15164 which instructions need to have their operands fetched from the
15165 constant table and builds the table.
15167 The algorithm starts by building a table of all the constants that
15168 need fixing up and all the natural barriers in the function (places
15169 where a constant table can be dropped without breaking the flow).
15170 For each fixup we note how far the pc-relative replacement will be
15171 able to reach and the offset of the instruction into the function.
15173 Having built the table we then group the fixes together to form
15174 tables that are as large as possible (subject to addressing
15175 constraints) and emit each table of constants after the last
15176 barrier that is within range of all the instructions in the group.
15177 If a group does not contain a barrier, then we forcibly create one
15178 by inserting a jump instruction into the flow. Once the table has
15179 been inserted, the insns are then modified to reference the
15180 relevant entry in the pool.
15182 Possible enhancements to the algorithm (not implemented) are:
15184 1) For some processors and object formats, there may be benefit in
15185 aligning the pools to the start of cache lines; this alignment
15186 would need to be taken into account when calculating addressability
15187 of a pool. */
15189 /* These typedefs are located at the start of this file, so that
15190 they can be used in the prototypes there. This comment is to
15191 remind readers of that fact so that the following structures
15192 can be understood more easily.
15194 typedef struct minipool_node Mnode;
15195 typedef struct minipool_fixup Mfix; */
15197 struct minipool_node
15199 /* Doubly linked chain of entries. */
15200 Mnode * next;
15201 Mnode * prev;
15202 /* The maximum offset into the code that this entry can be placed. While
15203 pushing fixes for forward references, all entries are sorted in order
15204 of increasing max_address. */
15205 HOST_WIDE_INT max_address;
15206 /* Similarly for an entry inserted for a backwards ref. */
15207 HOST_WIDE_INT min_address;
15208 /* The number of fixes referencing this entry. This can become zero
15209 if we "unpush" an entry. In this case we ignore the entry when we
15210 come to emit the code. */
15211 int refcount;
15212 /* The offset from the start of the minipool. */
15213 HOST_WIDE_INT offset;
15214 /* The value in table. */
15215 rtx value;
15216 /* The mode of value. */
15217 enum machine_mode mode;
15218 /* The size of the value. With iWMMXt enabled
15219 sizes > 4 also imply an alignment of 8-bytes. */
15220 int fix_size;
15223 struct minipool_fixup
15225 Mfix * next;
15226 rtx insn;
15227 HOST_WIDE_INT address;
15228 rtx * loc;
15229 enum machine_mode mode;
15230 int fix_size;
15231 rtx value;
15232 Mnode * minipool;
15233 HOST_WIDE_INT forwards;
15234 HOST_WIDE_INT backwards;
15237 /* Fixes less than a word need padding out to a word boundary. */
15238 #define MINIPOOL_FIX_SIZE(mode) \
15239 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15241 static Mnode * minipool_vector_head;
15242 static Mnode * minipool_vector_tail;
15243 static rtx minipool_vector_label;
15244 static int minipool_pad;
15246 /* The linked list of all minipool fixes required for this function. */
15247 Mfix * minipool_fix_head;
15248 Mfix * minipool_fix_tail;
15249 /* The fix entry for the current minipool, once it has been placed. */
15250 Mfix * minipool_barrier;
15252 /* Determines if INSN is the start of a jump table. Returns the end
15253 of the TABLE or NULL_RTX. */
15254 static rtx
15255 is_jump_table (rtx insn)
15257 rtx table;
15259 if (jump_to_label_p (insn)
15260 && ((table = next_active_insn (JUMP_LABEL (insn)))
15261 == next_active_insn (insn))
15262 && table != NULL
15263 && JUMP_TABLE_DATA_P (table))
15264 return table;
15266 return NULL_RTX;
15269 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15270 #define JUMP_TABLES_IN_TEXT_SECTION 0
15271 #endif
15273 static HOST_WIDE_INT
15274 get_jump_table_size (rtx insn)
15276 /* ADDR_VECs only take room if read-only data does into the text
15277 section. */
15278 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15280 rtx body = PATTERN (insn);
15281 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15282 HOST_WIDE_INT size;
15283 HOST_WIDE_INT modesize;
15285 modesize = GET_MODE_SIZE (GET_MODE (body));
15286 size = modesize * XVECLEN (body, elt);
15287 switch (modesize)
15289 case 1:
15290 /* Round up size of TBB table to a halfword boundary. */
15291 size = (size + 1) & ~(HOST_WIDE_INT)1;
15292 break;
15293 case 2:
15294 /* No padding necessary for TBH. */
15295 break;
15296 case 4:
15297 /* Add two bytes for alignment on Thumb. */
15298 if (TARGET_THUMB)
15299 size += 2;
15300 break;
15301 default:
15302 gcc_unreachable ();
15304 return size;
15307 return 0;
15310 /* Return the maximum amount of padding that will be inserted before
15311 label LABEL. */
15313 static HOST_WIDE_INT
15314 get_label_padding (rtx label)
15316 HOST_WIDE_INT align, min_insn_size;
15318 align = 1 << label_to_alignment (label);
15319 min_insn_size = TARGET_THUMB ? 2 : 4;
15320 return align > min_insn_size ? align - min_insn_size : 0;
15323 /* Move a minipool fix MP from its current location to before MAX_MP.
15324 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15325 constraints may need updating. */
15326 static Mnode *
15327 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15328 HOST_WIDE_INT max_address)
15330 /* The code below assumes these are different. */
15331 gcc_assert (mp != max_mp);
15333 if (max_mp == NULL)
15335 if (max_address < mp->max_address)
15336 mp->max_address = max_address;
15338 else
15340 if (max_address > max_mp->max_address - mp->fix_size)
15341 mp->max_address = max_mp->max_address - mp->fix_size;
15342 else
15343 mp->max_address = max_address;
15345 /* Unlink MP from its current position. Since max_mp is non-null,
15346 mp->prev must be non-null. */
15347 mp->prev->next = mp->next;
15348 if (mp->next != NULL)
15349 mp->next->prev = mp->prev;
15350 else
15351 minipool_vector_tail = mp->prev;
15353 /* Re-insert it before MAX_MP. */
15354 mp->next = max_mp;
15355 mp->prev = max_mp->prev;
15356 max_mp->prev = mp;
15358 if (mp->prev != NULL)
15359 mp->prev->next = mp;
15360 else
15361 minipool_vector_head = mp;
15364 /* Save the new entry. */
15365 max_mp = mp;
15367 /* Scan over the preceding entries and adjust their addresses as
15368 required. */
15369 while (mp->prev != NULL
15370 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15372 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15373 mp = mp->prev;
15376 return max_mp;
15379 /* Add a constant to the minipool for a forward reference. Returns the
15380 node added or NULL if the constant will not fit in this pool. */
15381 static Mnode *
15382 add_minipool_forward_ref (Mfix *fix)
15384 /* If set, max_mp is the first pool_entry that has a lower
15385 constraint than the one we are trying to add. */
15386 Mnode * max_mp = NULL;
15387 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15388 Mnode * mp;
15390 /* If the minipool starts before the end of FIX->INSN then this FIX
15391 can not be placed into the current pool. Furthermore, adding the
15392 new constant pool entry may cause the pool to start FIX_SIZE bytes
15393 earlier. */
15394 if (minipool_vector_head &&
15395 (fix->address + get_attr_length (fix->insn)
15396 >= minipool_vector_head->max_address - fix->fix_size))
15397 return NULL;
15399 /* Scan the pool to see if a constant with the same value has
15400 already been added. While we are doing this, also note the
15401 location where we must insert the constant if it doesn't already
15402 exist. */
15403 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15405 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15406 && fix->mode == mp->mode
15407 && (!LABEL_P (fix->value)
15408 || (CODE_LABEL_NUMBER (fix->value)
15409 == CODE_LABEL_NUMBER (mp->value)))
15410 && rtx_equal_p (fix->value, mp->value))
15412 /* More than one fix references this entry. */
15413 mp->refcount++;
15414 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15417 /* Note the insertion point if necessary. */
15418 if (max_mp == NULL
15419 && mp->max_address > max_address)
15420 max_mp = mp;
15422 /* If we are inserting an 8-bytes aligned quantity and
15423 we have not already found an insertion point, then
15424 make sure that all such 8-byte aligned quantities are
15425 placed at the start of the pool. */
15426 if (ARM_DOUBLEWORD_ALIGN
15427 && max_mp == NULL
15428 && fix->fix_size >= 8
15429 && mp->fix_size < 8)
15431 max_mp = mp;
15432 max_address = mp->max_address;
15436 /* The value is not currently in the minipool, so we need to create
15437 a new entry for it. If MAX_MP is NULL, the entry will be put on
15438 the end of the list since the placement is less constrained than
15439 any existing entry. Otherwise, we insert the new fix before
15440 MAX_MP and, if necessary, adjust the constraints on the other
15441 entries. */
15442 mp = XNEW (Mnode);
15443 mp->fix_size = fix->fix_size;
15444 mp->mode = fix->mode;
15445 mp->value = fix->value;
15446 mp->refcount = 1;
15447 /* Not yet required for a backwards ref. */
15448 mp->min_address = -65536;
15450 if (max_mp == NULL)
15452 mp->max_address = max_address;
15453 mp->next = NULL;
15454 mp->prev = minipool_vector_tail;
15456 if (mp->prev == NULL)
15458 minipool_vector_head = mp;
15459 minipool_vector_label = gen_label_rtx ();
15461 else
15462 mp->prev->next = mp;
15464 minipool_vector_tail = mp;
15466 else
15468 if (max_address > max_mp->max_address - mp->fix_size)
15469 mp->max_address = max_mp->max_address - mp->fix_size;
15470 else
15471 mp->max_address = max_address;
15473 mp->next = max_mp;
15474 mp->prev = max_mp->prev;
15475 max_mp->prev = mp;
15476 if (mp->prev != NULL)
15477 mp->prev->next = mp;
15478 else
15479 minipool_vector_head = mp;
15482 /* Save the new entry. */
15483 max_mp = mp;
15485 /* Scan over the preceding entries and adjust their addresses as
15486 required. */
15487 while (mp->prev != NULL
15488 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15490 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15491 mp = mp->prev;
15494 return max_mp;
15497 static Mnode *
15498 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15499 HOST_WIDE_INT min_address)
15501 HOST_WIDE_INT offset;
15503 /* The code below assumes these are different. */
15504 gcc_assert (mp != min_mp);
15506 if (min_mp == NULL)
15508 if (min_address > mp->min_address)
15509 mp->min_address = min_address;
15511 else
15513 /* We will adjust this below if it is too loose. */
15514 mp->min_address = min_address;
15516 /* Unlink MP from its current position. Since min_mp is non-null,
15517 mp->next must be non-null. */
15518 mp->next->prev = mp->prev;
15519 if (mp->prev != NULL)
15520 mp->prev->next = mp->next;
15521 else
15522 minipool_vector_head = mp->next;
15524 /* Reinsert it after MIN_MP. */
15525 mp->prev = min_mp;
15526 mp->next = min_mp->next;
15527 min_mp->next = mp;
15528 if (mp->next != NULL)
15529 mp->next->prev = mp;
15530 else
15531 minipool_vector_tail = mp;
15534 min_mp = mp;
15536 offset = 0;
15537 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15539 mp->offset = offset;
15540 if (mp->refcount > 0)
15541 offset += mp->fix_size;
15543 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15544 mp->next->min_address = mp->min_address + mp->fix_size;
15547 return min_mp;
15550 /* Add a constant to the minipool for a backward reference. Returns the
15551 node added or NULL if the constant will not fit in this pool.
15553 Note that the code for insertion for a backwards reference can be
15554 somewhat confusing because the calculated offsets for each fix do
15555 not take into account the size of the pool (which is still under
15556 construction. */
15557 static Mnode *
15558 add_minipool_backward_ref (Mfix *fix)
15560 /* If set, min_mp is the last pool_entry that has a lower constraint
15561 than the one we are trying to add. */
15562 Mnode *min_mp = NULL;
15563 /* This can be negative, since it is only a constraint. */
15564 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15565 Mnode *mp;
15567 /* If we can't reach the current pool from this insn, or if we can't
15568 insert this entry at the end of the pool without pushing other
15569 fixes out of range, then we don't try. This ensures that we
15570 can't fail later on. */
15571 if (min_address >= minipool_barrier->address
15572 || (minipool_vector_tail->min_address + fix->fix_size
15573 >= minipool_barrier->address))
15574 return NULL;
15576 /* Scan the pool to see if a constant with the same value has
15577 already been added. While we are doing this, also note the
15578 location where we must insert the constant if it doesn't already
15579 exist. */
15580 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15582 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15583 && fix->mode == mp->mode
15584 && (!LABEL_P (fix->value)
15585 || (CODE_LABEL_NUMBER (fix->value)
15586 == CODE_LABEL_NUMBER (mp->value)))
15587 && rtx_equal_p (fix->value, mp->value)
15588 /* Check that there is enough slack to move this entry to the
15589 end of the table (this is conservative). */
15590 && (mp->max_address
15591 > (minipool_barrier->address
15592 + minipool_vector_tail->offset
15593 + minipool_vector_tail->fix_size)))
15595 mp->refcount++;
15596 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15599 if (min_mp != NULL)
15600 mp->min_address += fix->fix_size;
15601 else
15603 /* Note the insertion point if necessary. */
15604 if (mp->min_address < min_address)
15606 /* For now, we do not allow the insertion of 8-byte alignment
15607 requiring nodes anywhere but at the start of the pool. */
15608 if (ARM_DOUBLEWORD_ALIGN
15609 && fix->fix_size >= 8 && mp->fix_size < 8)
15610 return NULL;
15611 else
15612 min_mp = mp;
15614 else if (mp->max_address
15615 < minipool_barrier->address + mp->offset + fix->fix_size)
15617 /* Inserting before this entry would push the fix beyond
15618 its maximum address (which can happen if we have
15619 re-located a forwards fix); force the new fix to come
15620 after it. */
15621 if (ARM_DOUBLEWORD_ALIGN
15622 && fix->fix_size >= 8 && mp->fix_size < 8)
15623 return NULL;
15624 else
15626 min_mp = mp;
15627 min_address = mp->min_address + fix->fix_size;
15630 /* Do not insert a non-8-byte aligned quantity before 8-byte
15631 aligned quantities. */
15632 else if (ARM_DOUBLEWORD_ALIGN
15633 && fix->fix_size < 8
15634 && mp->fix_size >= 8)
15636 min_mp = mp;
15637 min_address = mp->min_address + fix->fix_size;
15642 /* We need to create a new entry. */
15643 mp = XNEW (Mnode);
15644 mp->fix_size = fix->fix_size;
15645 mp->mode = fix->mode;
15646 mp->value = fix->value;
15647 mp->refcount = 1;
15648 mp->max_address = minipool_barrier->address + 65536;
15650 mp->min_address = min_address;
15652 if (min_mp == NULL)
15654 mp->prev = NULL;
15655 mp->next = minipool_vector_head;
15657 if (mp->next == NULL)
15659 minipool_vector_tail = mp;
15660 minipool_vector_label = gen_label_rtx ();
15662 else
15663 mp->next->prev = mp;
15665 minipool_vector_head = mp;
15667 else
15669 mp->next = min_mp->next;
15670 mp->prev = min_mp;
15671 min_mp->next = mp;
15673 if (mp->next != NULL)
15674 mp->next->prev = mp;
15675 else
15676 minipool_vector_tail = mp;
15679 /* Save the new entry. */
15680 min_mp = mp;
15682 if (mp->prev)
15683 mp = mp->prev;
15684 else
15685 mp->offset = 0;
15687 /* Scan over the following entries and adjust their offsets. */
15688 while (mp->next != NULL)
15690 if (mp->next->min_address < mp->min_address + mp->fix_size)
15691 mp->next->min_address = mp->min_address + mp->fix_size;
15693 if (mp->refcount)
15694 mp->next->offset = mp->offset + mp->fix_size;
15695 else
15696 mp->next->offset = mp->offset;
15698 mp = mp->next;
15701 return min_mp;
15704 static void
15705 assign_minipool_offsets (Mfix *barrier)
15707 HOST_WIDE_INT offset = 0;
15708 Mnode *mp;
15710 minipool_barrier = barrier;
15712 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15714 mp->offset = offset;
15716 if (mp->refcount > 0)
15717 offset += mp->fix_size;
15721 /* Output the literal table */
15722 static void
15723 dump_minipool (rtx scan)
15725 Mnode * mp;
15726 Mnode * nmp;
15727 int align64 = 0;
15729 if (ARM_DOUBLEWORD_ALIGN)
15730 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15731 if (mp->refcount > 0 && mp->fix_size >= 8)
15733 align64 = 1;
15734 break;
15737 if (dump_file)
15738 fprintf (dump_file,
15739 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15740 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
15742 scan = emit_label_after (gen_label_rtx (), scan);
15743 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
15744 scan = emit_label_after (minipool_vector_label, scan);
15746 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
15748 if (mp->refcount > 0)
15750 if (dump_file)
15752 fprintf (dump_file,
15753 ";; Offset %u, min %ld, max %ld ",
15754 (unsigned) mp->offset, (unsigned long) mp->min_address,
15755 (unsigned long) mp->max_address);
15756 arm_print_value (dump_file, mp->value);
15757 fputc ('\n', dump_file);
15760 switch (mp->fix_size)
15762 #ifdef HAVE_consttable_1
15763 case 1:
15764 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
15765 break;
15767 #endif
15768 #ifdef HAVE_consttable_2
15769 case 2:
15770 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
15771 break;
15773 #endif
15774 #ifdef HAVE_consttable_4
15775 case 4:
15776 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
15777 break;
15779 #endif
15780 #ifdef HAVE_consttable_8
15781 case 8:
15782 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
15783 break;
15785 #endif
15786 #ifdef HAVE_consttable_16
15787 case 16:
15788 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
15789 break;
15791 #endif
15792 default:
15793 gcc_unreachable ();
15797 nmp = mp->next;
15798 free (mp);
15801 minipool_vector_head = minipool_vector_tail = NULL;
15802 scan = emit_insn_after (gen_consttable_end (), scan);
15803 scan = emit_barrier_after (scan);
15806 /* Return the cost of forcibly inserting a barrier after INSN. */
15807 static int
15808 arm_barrier_cost (rtx insn)
15810 /* Basing the location of the pool on the loop depth is preferable,
15811 but at the moment, the basic block information seems to be
15812 corrupt by this stage of the compilation. */
15813 int base_cost = 50;
15814 rtx next = next_nonnote_insn (insn);
15816 if (next != NULL && LABEL_P (next))
15817 base_cost -= 20;
15819 switch (GET_CODE (insn))
15821 case CODE_LABEL:
15822 /* It will always be better to place the table before the label, rather
15823 than after it. */
15824 return 50;
15826 case INSN:
15827 case CALL_INSN:
15828 return base_cost;
15830 case JUMP_INSN:
15831 return base_cost - 10;
15833 default:
15834 return base_cost + 10;
15838 /* Find the best place in the insn stream in the range
15839 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15840 Create the barrier by inserting a jump and add a new fix entry for
15841 it. */
15842 static Mfix *
15843 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
15845 HOST_WIDE_INT count = 0;
15846 rtx barrier;
15847 rtx from = fix->insn;
15848 /* The instruction after which we will insert the jump. */
15849 rtx selected = NULL;
15850 int selected_cost;
15851 /* The address at which the jump instruction will be placed. */
15852 HOST_WIDE_INT selected_address;
15853 Mfix * new_fix;
15854 HOST_WIDE_INT max_count = max_address - fix->address;
15855 rtx label = gen_label_rtx ();
15857 selected_cost = arm_barrier_cost (from);
15858 selected_address = fix->address;
15860 while (from && count < max_count)
15862 rtx tmp;
15863 int new_cost;
15865 /* This code shouldn't have been called if there was a natural barrier
15866 within range. */
15867 gcc_assert (!BARRIER_P (from));
15869 /* Count the length of this insn. This must stay in sync with the
15870 code that pushes minipool fixes. */
15871 if (LABEL_P (from))
15872 count += get_label_padding (from);
15873 else
15874 count += get_attr_length (from);
15876 /* If there is a jump table, add its length. */
15877 tmp = is_jump_table (from);
15878 if (tmp != NULL)
15880 count += get_jump_table_size (tmp);
15882 /* Jump tables aren't in a basic block, so base the cost on
15883 the dispatch insn. If we select this location, we will
15884 still put the pool after the table. */
15885 new_cost = arm_barrier_cost (from);
15887 if (count < max_count
15888 && (!selected || new_cost <= selected_cost))
15890 selected = tmp;
15891 selected_cost = new_cost;
15892 selected_address = fix->address + count;
15895 /* Continue after the dispatch table. */
15896 from = NEXT_INSN (tmp);
15897 continue;
15900 new_cost = arm_barrier_cost (from);
15902 if (count < max_count
15903 && (!selected || new_cost <= selected_cost))
15905 selected = from;
15906 selected_cost = new_cost;
15907 selected_address = fix->address + count;
15910 from = NEXT_INSN (from);
15913 /* Make sure that we found a place to insert the jump. */
15914 gcc_assert (selected);
15916 /* Make sure we do not split a call and its corresponding
15917 CALL_ARG_LOCATION note. */
15918 if (CALL_P (selected))
15920 rtx next = NEXT_INSN (selected);
15921 if (next && NOTE_P (next)
15922 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
15923 selected = next;
15926 /* Create a new JUMP_INSN that branches around a barrier. */
15927 from = emit_jump_insn_after (gen_jump (label), selected);
15928 JUMP_LABEL (from) = label;
15929 barrier = emit_barrier_after (from);
15930 emit_label_after (label, barrier);
15932 /* Create a minipool barrier entry for the new barrier. */
15933 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
15934 new_fix->insn = barrier;
15935 new_fix->address = selected_address;
15936 new_fix->next = fix->next;
15937 fix->next = new_fix;
15939 return new_fix;
15942 /* Record that there is a natural barrier in the insn stream at
15943 ADDRESS. */
15944 static void
15945 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
15947 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15949 fix->insn = insn;
15950 fix->address = address;
15952 fix->next = NULL;
15953 if (minipool_fix_head != NULL)
15954 minipool_fix_tail->next = fix;
15955 else
15956 minipool_fix_head = fix;
15958 minipool_fix_tail = fix;
15961 /* Record INSN, which will need fixing up to load a value from the
15962 minipool. ADDRESS is the offset of the insn since the start of the
15963 function; LOC is a pointer to the part of the insn which requires
15964 fixing; VALUE is the constant that must be loaded, which is of type
15965 MODE. */
15966 static void
15967 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
15968 enum machine_mode mode, rtx value)
15970 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
15972 fix->insn = insn;
15973 fix->address = address;
15974 fix->loc = loc;
15975 fix->mode = mode;
15976 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
15977 fix->value = value;
15978 fix->forwards = get_attr_pool_range (insn);
15979 fix->backwards = get_attr_neg_pool_range (insn);
15980 fix->minipool = NULL;
15982 /* If an insn doesn't have a range defined for it, then it isn't
15983 expecting to be reworked by this code. Better to stop now than
15984 to generate duff assembly code. */
15985 gcc_assert (fix->forwards || fix->backwards);
15987 /* If an entry requires 8-byte alignment then assume all constant pools
15988 require 4 bytes of padding. Trying to do this later on a per-pool
15989 basis is awkward because existing pool entries have to be modified. */
15990 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
15991 minipool_pad = 4;
15993 if (dump_file)
15995 fprintf (dump_file,
15996 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15997 GET_MODE_NAME (mode),
15998 INSN_UID (insn), (unsigned long) address,
15999 -1 * (long)fix->backwards, (long)fix->forwards);
16000 arm_print_value (dump_file, fix->value);
16001 fprintf (dump_file, "\n");
16004 /* Add it to the chain of fixes. */
16005 fix->next = NULL;
16007 if (minipool_fix_head != NULL)
16008 minipool_fix_tail->next = fix;
16009 else
16010 minipool_fix_head = fix;
16012 minipool_fix_tail = fix;
16015 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16016 Returns the number of insns needed, or 99 if we don't know how to
16017 do it. */
16019 arm_const_double_inline_cost (rtx val)
16021 rtx lowpart, highpart;
16022 enum machine_mode mode;
16024 mode = GET_MODE (val);
16026 if (mode == VOIDmode)
16027 mode = DImode;
16029 gcc_assert (GET_MODE_SIZE (mode) == 8);
16031 lowpart = gen_lowpart (SImode, val);
16032 highpart = gen_highpart_mode (SImode, mode, val);
16034 gcc_assert (CONST_INT_P (lowpart));
16035 gcc_assert (CONST_INT_P (highpart));
16037 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16038 NULL_RTX, NULL_RTX, 0, 0)
16039 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16040 NULL_RTX, NULL_RTX, 0, 0));
16043 /* Return true if it is worthwhile to split a 64-bit constant into two
16044 32-bit operations. This is the case if optimizing for size, or
16045 if we have load delay slots, or if one 32-bit part can be done with
16046 a single data operation. */
16047 bool
16048 arm_const_double_by_parts (rtx val)
16050 enum machine_mode mode = GET_MODE (val);
16051 rtx part;
16053 if (optimize_size || arm_ld_sched)
16054 return true;
16056 if (mode == VOIDmode)
16057 mode = DImode;
16059 part = gen_highpart_mode (SImode, mode, val);
16061 gcc_assert (CONST_INT_P (part));
16063 if (const_ok_for_arm (INTVAL (part))
16064 || const_ok_for_arm (~INTVAL (part)))
16065 return true;
16067 part = gen_lowpart (SImode, val);
16069 gcc_assert (CONST_INT_P (part));
16071 if (const_ok_for_arm (INTVAL (part))
16072 || const_ok_for_arm (~INTVAL (part)))
16073 return true;
16075 return false;
16078 /* Return true if it is possible to inline both the high and low parts
16079 of a 64-bit constant into 32-bit data processing instructions. */
16080 bool
16081 arm_const_double_by_immediates (rtx val)
16083 enum machine_mode mode = GET_MODE (val);
16084 rtx part;
16086 if (mode == VOIDmode)
16087 mode = DImode;
16089 part = gen_highpart_mode (SImode, mode, val);
16091 gcc_assert (CONST_INT_P (part));
16093 if (!const_ok_for_arm (INTVAL (part)))
16094 return false;
16096 part = gen_lowpart (SImode, val);
16098 gcc_assert (CONST_INT_P (part));
16100 if (!const_ok_for_arm (INTVAL (part)))
16101 return false;
16103 return true;
16106 /* Scan INSN and note any of its operands that need fixing.
16107 If DO_PUSHES is false we do not actually push any of the fixups
16108 needed. */
16109 static void
16110 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16112 int opno;
16114 extract_insn (insn);
16116 if (!constrain_operands (1))
16117 fatal_insn_not_found (insn);
16119 if (recog_data.n_alternatives == 0)
16120 return;
16122 /* Fill in recog_op_alt with information about the constraints of
16123 this insn. */
16124 preprocess_constraints ();
16126 for (opno = 0; opno < recog_data.n_operands; opno++)
16128 /* Things we need to fix can only occur in inputs. */
16129 if (recog_data.operand_type[opno] != OP_IN)
16130 continue;
16132 /* If this alternative is a memory reference, then any mention
16133 of constants in this alternative is really to fool reload
16134 into allowing us to accept one there. We need to fix them up
16135 now so that we output the right code. */
16136 if (recog_op_alt[opno][which_alternative].memory_ok)
16138 rtx op = recog_data.operand[opno];
16140 if (CONSTANT_P (op))
16142 if (do_pushes)
16143 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16144 recog_data.operand_mode[opno], op);
16146 else if (MEM_P (op)
16147 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16148 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16150 if (do_pushes)
16152 rtx cop = avoid_constant_pool_reference (op);
16154 /* Casting the address of something to a mode narrower
16155 than a word can cause avoid_constant_pool_reference()
16156 to return the pool reference itself. That's no good to
16157 us here. Lets just hope that we can use the
16158 constant pool value directly. */
16159 if (op == cop)
16160 cop = get_pool_constant (XEXP (op, 0));
16162 push_minipool_fix (insn, address,
16163 recog_data.operand_loc[opno],
16164 recog_data.operand_mode[opno], cop);
16171 return;
16174 /* Rewrite move insn into subtract of 0 if the condition codes will
16175 be useful in next conditional jump insn. */
16177 static void
16178 thumb1_reorg (void)
16180 basic_block bb;
16182 FOR_EACH_BB (bb)
16184 rtx dest, src;
16185 rtx pat, op0, set = NULL;
16186 rtx prev, insn = BB_END (bb);
16187 bool insn_clobbered = false;
16189 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16190 insn = PREV_INSN (insn);
16192 /* Find the last cbranchsi4_insn in basic block BB. */
16193 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16194 continue;
16196 /* Get the register with which we are comparing. */
16197 pat = PATTERN (insn);
16198 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16200 /* Find the first flag setting insn before INSN in basic block BB. */
16201 gcc_assert (insn != BB_HEAD (bb));
16202 for (prev = PREV_INSN (insn);
16203 (!insn_clobbered
16204 && prev != BB_HEAD (bb)
16205 && (NOTE_P (prev)
16206 || DEBUG_INSN_P (prev)
16207 || ((set = single_set (prev)) != NULL
16208 && get_attr_conds (prev) == CONDS_NOCOND)));
16209 prev = PREV_INSN (prev))
16211 if (reg_set_p (op0, prev))
16212 insn_clobbered = true;
16215 /* Skip if op0 is clobbered by insn other than prev. */
16216 if (insn_clobbered)
16217 continue;
16219 if (!set)
16220 continue;
16222 dest = SET_DEST (set);
16223 src = SET_SRC (set);
16224 if (!low_register_operand (dest, SImode)
16225 || !low_register_operand (src, SImode))
16226 continue;
16228 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16229 in INSN. Both src and dest of the move insn are checked. */
16230 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16232 dest = copy_rtx (dest);
16233 src = copy_rtx (src);
16234 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16235 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16236 INSN_CODE (prev) = -1;
16237 /* Set test register in INSN to dest. */
16238 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16239 INSN_CODE (insn) = -1;
16244 /* Convert instructions to their cc-clobbering variant if possible, since
16245 that allows us to use smaller encodings. */
16247 static void
16248 thumb2_reorg (void)
16250 basic_block bb;
16251 regset_head live;
16253 INIT_REG_SET (&live);
16255 /* We are freeing block_for_insn in the toplev to keep compatibility
16256 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16257 compute_bb_for_insn ();
16258 df_analyze ();
16260 FOR_EACH_BB (bb)
16262 rtx insn;
16264 COPY_REG_SET (&live, DF_LR_OUT (bb));
16265 df_simulate_initialize_backwards (bb, &live);
16266 FOR_BB_INSNS_REVERSE (bb, insn)
16268 if (NONJUMP_INSN_P (insn)
16269 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16270 && GET_CODE (PATTERN (insn)) == SET)
16272 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16273 rtx pat = PATTERN (insn);
16274 rtx dst = XEXP (pat, 0);
16275 rtx src = XEXP (pat, 1);
16276 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16278 if (!OBJECT_P (src))
16279 op0 = XEXP (src, 0);
16281 if (BINARY_P (src))
16282 op1 = XEXP (src, 1);
16284 if (low_register_operand (dst, SImode))
16286 switch (GET_CODE (src))
16288 case PLUS:
16289 /* Adding two registers and storing the result
16290 in the first source is already a 16-bit
16291 operation. */
16292 if (rtx_equal_p (dst, op0)
16293 && register_operand (op1, SImode))
16294 break;
16296 if (low_register_operand (op0, SImode))
16298 /* ADDS <Rd>,<Rn>,<Rm> */
16299 if (low_register_operand (op1, SImode))
16300 action = CONV;
16301 /* ADDS <Rdn>,#<imm8> */
16302 /* SUBS <Rdn>,#<imm8> */
16303 else if (rtx_equal_p (dst, op0)
16304 && CONST_INT_P (op1)
16305 && IN_RANGE (INTVAL (op1), -255, 255))
16306 action = CONV;
16307 /* ADDS <Rd>,<Rn>,#<imm3> */
16308 /* SUBS <Rd>,<Rn>,#<imm3> */
16309 else if (CONST_INT_P (op1)
16310 && IN_RANGE (INTVAL (op1), -7, 7))
16311 action = CONV;
16313 /* ADCS <Rd>, <Rn> */
16314 else if (GET_CODE (XEXP (src, 0)) == PLUS
16315 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16316 && low_register_operand (XEXP (XEXP (src, 0), 1),
16317 SImode)
16318 && COMPARISON_P (op1)
16319 && cc_register (XEXP (op1, 0), VOIDmode)
16320 && maybe_get_arm_condition_code (op1) == ARM_CS
16321 && XEXP (op1, 1) == const0_rtx)
16322 action = CONV;
16323 break;
16325 case MINUS:
16326 /* RSBS <Rd>,<Rn>,#0
16327 Not handled here: see NEG below. */
16328 /* SUBS <Rd>,<Rn>,#<imm3>
16329 SUBS <Rdn>,#<imm8>
16330 Not handled here: see PLUS above. */
16331 /* SUBS <Rd>,<Rn>,<Rm> */
16332 if (low_register_operand (op0, SImode)
16333 && low_register_operand (op1, SImode))
16334 action = CONV;
16335 break;
16337 case MULT:
16338 /* MULS <Rdm>,<Rn>,<Rdm>
16339 As an exception to the rule, this is only used
16340 when optimizing for size since MULS is slow on all
16341 known implementations. We do not even want to use
16342 MULS in cold code, if optimizing for speed, so we
16343 test the global flag here. */
16344 if (!optimize_size)
16345 break;
16346 /* else fall through. */
16347 case AND:
16348 case IOR:
16349 case XOR:
16350 /* ANDS <Rdn>,<Rm> */
16351 if (rtx_equal_p (dst, op0)
16352 && low_register_operand (op1, SImode))
16353 action = CONV;
16354 else if (rtx_equal_p (dst, op1)
16355 && low_register_operand (op0, SImode))
16356 action = SWAP_CONV;
16357 break;
16359 case ASHIFTRT:
16360 case ASHIFT:
16361 case LSHIFTRT:
16362 /* ASRS <Rdn>,<Rm> */
16363 /* LSRS <Rdn>,<Rm> */
16364 /* LSLS <Rdn>,<Rm> */
16365 if (rtx_equal_p (dst, op0)
16366 && low_register_operand (op1, SImode))
16367 action = CONV;
16368 /* ASRS <Rd>,<Rm>,#<imm5> */
16369 /* LSRS <Rd>,<Rm>,#<imm5> */
16370 /* LSLS <Rd>,<Rm>,#<imm5> */
16371 else if (low_register_operand (op0, SImode)
16372 && CONST_INT_P (op1)
16373 && IN_RANGE (INTVAL (op1), 0, 31))
16374 action = CONV;
16375 break;
16377 case ROTATERT:
16378 /* RORS <Rdn>,<Rm> */
16379 if (rtx_equal_p (dst, op0)
16380 && low_register_operand (op1, SImode))
16381 action = CONV;
16382 break;
16384 case NOT:
16385 case NEG:
16386 /* MVNS <Rd>,<Rm> */
16387 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16388 if (low_register_operand (op0, SImode))
16389 action = CONV;
16390 break;
16392 case CONST_INT:
16393 /* MOVS <Rd>,#<imm8> */
16394 if (CONST_INT_P (src)
16395 && IN_RANGE (INTVAL (src), 0, 255))
16396 action = CONV;
16397 break;
16399 case REG:
16400 /* MOVS and MOV<c> with registers have different
16401 encodings, so are not relevant here. */
16402 break;
16404 default:
16405 break;
16409 if (action != SKIP)
16411 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16412 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16413 rtvec vec;
16415 if (action == SWAP_CONV)
16417 src = copy_rtx (src);
16418 XEXP (src, 0) = op1;
16419 XEXP (src, 1) = op0;
16420 pat = gen_rtx_SET (VOIDmode, dst, src);
16421 vec = gen_rtvec (2, pat, clobber);
16423 else /* action == CONV */
16424 vec = gen_rtvec (2, pat, clobber);
16426 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16427 INSN_CODE (insn) = -1;
16431 if (NONDEBUG_INSN_P (insn))
16432 df_simulate_one_insn_backwards (bb, insn, &live);
16436 CLEAR_REG_SET (&live);
16439 /* Gcc puts the pool in the wrong place for ARM, since we can only
16440 load addresses a limited distance around the pc. We do some
16441 special munging to move the constant pool values to the correct
16442 point in the code. */
16443 static void
16444 arm_reorg (void)
16446 rtx insn;
16447 HOST_WIDE_INT address = 0;
16448 Mfix * fix;
16450 if (TARGET_THUMB1)
16451 thumb1_reorg ();
16452 else if (TARGET_THUMB2)
16453 thumb2_reorg ();
16455 /* Ensure all insns that must be split have been split at this point.
16456 Otherwise, the pool placement code below may compute incorrect
16457 insn lengths. Note that when optimizing, all insns have already
16458 been split at this point. */
16459 if (!optimize)
16460 split_all_insns_noflow ();
16462 minipool_fix_head = minipool_fix_tail = NULL;
16464 /* The first insn must always be a note, or the code below won't
16465 scan it properly. */
16466 insn = get_insns ();
16467 gcc_assert (NOTE_P (insn));
16468 minipool_pad = 0;
16470 /* Scan all the insns and record the operands that will need fixing. */
16471 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16473 if (BARRIER_P (insn))
16474 push_minipool_barrier (insn, address);
16475 else if (INSN_P (insn))
16477 rtx table;
16479 note_invalid_constants (insn, address, true);
16480 address += get_attr_length (insn);
16482 /* If the insn is a vector jump, add the size of the table
16483 and skip the table. */
16484 if ((table = is_jump_table (insn)) != NULL)
16486 address += get_jump_table_size (table);
16487 insn = table;
16490 else if (LABEL_P (insn))
16491 /* Add the worst-case padding due to alignment. We don't add
16492 the _current_ padding because the minipool insertions
16493 themselves might change it. */
16494 address += get_label_padding (insn);
16497 fix = minipool_fix_head;
16499 /* Now scan the fixups and perform the required changes. */
16500 while (fix)
16502 Mfix * ftmp;
16503 Mfix * fdel;
16504 Mfix * last_added_fix;
16505 Mfix * last_barrier = NULL;
16506 Mfix * this_fix;
16508 /* Skip any further barriers before the next fix. */
16509 while (fix && BARRIER_P (fix->insn))
16510 fix = fix->next;
16512 /* No more fixes. */
16513 if (fix == NULL)
16514 break;
16516 last_added_fix = NULL;
16518 for (ftmp = fix; ftmp; ftmp = ftmp->next)
16520 if (BARRIER_P (ftmp->insn))
16522 if (ftmp->address >= minipool_vector_head->max_address)
16523 break;
16525 last_barrier = ftmp;
16527 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
16528 break;
16530 last_added_fix = ftmp; /* Keep track of the last fix added. */
16533 /* If we found a barrier, drop back to that; any fixes that we
16534 could have reached but come after the barrier will now go in
16535 the next mini-pool. */
16536 if (last_barrier != NULL)
16538 /* Reduce the refcount for those fixes that won't go into this
16539 pool after all. */
16540 for (fdel = last_barrier->next;
16541 fdel && fdel != ftmp;
16542 fdel = fdel->next)
16544 fdel->minipool->refcount--;
16545 fdel->minipool = NULL;
16548 ftmp = last_barrier;
16550 else
16552 /* ftmp is first fix that we can't fit into this pool and
16553 there no natural barriers that we could use. Insert a
16554 new barrier in the code somewhere between the previous
16555 fix and this one, and arrange to jump around it. */
16556 HOST_WIDE_INT max_address;
16558 /* The last item on the list of fixes must be a barrier, so
16559 we can never run off the end of the list of fixes without
16560 last_barrier being set. */
16561 gcc_assert (ftmp);
16563 max_address = minipool_vector_head->max_address;
16564 /* Check that there isn't another fix that is in range that
16565 we couldn't fit into this pool because the pool was
16566 already too large: we need to put the pool before such an
16567 instruction. The pool itself may come just after the
16568 fix because create_fix_barrier also allows space for a
16569 jump instruction. */
16570 if (ftmp->address < max_address)
16571 max_address = ftmp->address + 1;
16573 last_barrier = create_fix_barrier (last_added_fix, max_address);
16576 assign_minipool_offsets (last_barrier);
16578 while (ftmp)
16580 if (!BARRIER_P (ftmp->insn)
16581 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
16582 == NULL))
16583 break;
16585 ftmp = ftmp->next;
16588 /* Scan over the fixes we have identified for this pool, fixing them
16589 up and adding the constants to the pool itself. */
16590 for (this_fix = fix; this_fix && ftmp != this_fix;
16591 this_fix = this_fix->next)
16592 if (!BARRIER_P (this_fix->insn))
16594 rtx addr
16595 = plus_constant (Pmode,
16596 gen_rtx_LABEL_REF (VOIDmode,
16597 minipool_vector_label),
16598 this_fix->minipool->offset);
16599 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
16602 dump_minipool (last_barrier->insn);
16603 fix = ftmp;
16606 /* From now on we must synthesize any constants that we can't handle
16607 directly. This can happen if the RTL gets split during final
16608 instruction generation. */
16609 after_arm_reorg = 1;
16611 /* Free the minipool memory. */
16612 obstack_free (&minipool_obstack, minipool_startobj);
16615 /* Routines to output assembly language. */
16617 /* If the rtx is the correct value then return the string of the number.
16618 In this way we can ensure that valid double constants are generated even
16619 when cross compiling. */
16620 const char *
16621 fp_immediate_constant (rtx x)
16623 REAL_VALUE_TYPE r;
16625 if (!fp_consts_inited)
16626 init_fp_table ();
16628 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16630 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
16631 return "0";
16634 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
16635 static const char *
16636 fp_const_from_val (REAL_VALUE_TYPE *r)
16638 if (!fp_consts_inited)
16639 init_fp_table ();
16641 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
16642 return "0";
16645 /* OPERANDS[0] is the entire list of insns that constitute pop,
16646 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16647 is in the list, UPDATE is true iff the list contains explicit
16648 update of base register. */
16649 void
16650 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
16651 bool update)
16653 int i;
16654 char pattern[100];
16655 int offset;
16656 const char *conditional;
16657 int num_saves = XVECLEN (operands[0], 0);
16658 unsigned int regno;
16659 unsigned int regno_base = REGNO (operands[1]);
16661 offset = 0;
16662 offset += update ? 1 : 0;
16663 offset += return_pc ? 1 : 0;
16665 /* Is the base register in the list? */
16666 for (i = offset; i < num_saves; i++)
16668 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
16669 /* If SP is in the list, then the base register must be SP. */
16670 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
16671 /* If base register is in the list, there must be no explicit update. */
16672 if (regno == regno_base)
16673 gcc_assert (!update);
16676 conditional = reverse ? "%?%D0" : "%?%d0";
16677 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
16679 /* Output pop (not stmfd) because it has a shorter encoding. */
16680 gcc_assert (update);
16681 sprintf (pattern, "pop%s\t{", conditional);
16683 else
16685 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16686 It's just a convention, their semantics are identical. */
16687 if (regno_base == SP_REGNUM)
16688 sprintf (pattern, "ldm%sfd\t", conditional);
16689 else if (TARGET_UNIFIED_ASM)
16690 sprintf (pattern, "ldmia%s\t", conditional);
16691 else
16692 sprintf (pattern, "ldm%sia\t", conditional);
16694 strcat (pattern, reg_names[regno_base]);
16695 if (update)
16696 strcat (pattern, "!, {");
16697 else
16698 strcat (pattern, ", {");
16701 /* Output the first destination register. */
16702 strcat (pattern,
16703 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
16705 /* Output the rest of the destination registers. */
16706 for (i = offset + 1; i < num_saves; i++)
16708 strcat (pattern, ", ");
16709 strcat (pattern,
16710 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
16713 strcat (pattern, "}");
16715 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
16716 strcat (pattern, "^");
16718 output_asm_insn (pattern, &cond);
16722 /* Output the assembly for a store multiple. */
16724 const char *
16725 vfp_output_fstmd (rtx * operands)
16727 char pattern[100];
16728 int p;
16729 int base;
16730 int i;
16732 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
16733 p = strlen (pattern);
16735 gcc_assert (REG_P (operands[1]));
16737 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
16738 for (i = 1; i < XVECLEN (operands[2], 0); i++)
16740 p += sprintf (&pattern[p], ", d%d", base + i);
16742 strcpy (&pattern[p], "}");
16744 output_asm_insn (pattern, operands);
16745 return "";
16749 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16750 number of bytes pushed. */
16752 static int
16753 vfp_emit_fstmd (int base_reg, int count)
16755 rtx par;
16756 rtx dwarf;
16757 rtx tmp, reg;
16758 int i;
16760 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16761 register pairs are stored by a store multiple insn. We avoid this
16762 by pushing an extra pair. */
16763 if (count == 2 && !arm_arch6)
16765 if (base_reg == LAST_VFP_REGNUM - 3)
16766 base_reg -= 2;
16767 count++;
16770 /* FSTMD may not store more than 16 doubleword registers at once. Split
16771 larger stores into multiple parts (up to a maximum of two, in
16772 practice). */
16773 if (count > 16)
16775 int saved;
16776 /* NOTE: base_reg is an internal register number, so each D register
16777 counts as 2. */
16778 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
16779 saved += vfp_emit_fstmd (base_reg, 16);
16780 return saved;
16783 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16784 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16786 reg = gen_rtx_REG (DFmode, base_reg);
16787 base_reg += 2;
16789 XVECEXP (par, 0, 0)
16790 = gen_rtx_SET (VOIDmode,
16791 gen_frame_mem
16792 (BLKmode,
16793 gen_rtx_PRE_MODIFY (Pmode,
16794 stack_pointer_rtx,
16795 plus_constant
16796 (Pmode, stack_pointer_rtx,
16797 - (count * 8)))
16799 gen_rtx_UNSPEC (BLKmode,
16800 gen_rtvec (1, reg),
16801 UNSPEC_PUSH_MULT));
16803 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16804 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
16805 RTX_FRAME_RELATED_P (tmp) = 1;
16806 XVECEXP (dwarf, 0, 0) = tmp;
16808 tmp = gen_rtx_SET (VOIDmode,
16809 gen_frame_mem (DFmode, stack_pointer_rtx),
16810 reg);
16811 RTX_FRAME_RELATED_P (tmp) = 1;
16812 XVECEXP (dwarf, 0, 1) = tmp;
16814 for (i = 1; i < count; i++)
16816 reg = gen_rtx_REG (DFmode, base_reg);
16817 base_reg += 2;
16818 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16820 tmp = gen_rtx_SET (VOIDmode,
16821 gen_frame_mem (DFmode,
16822 plus_constant (Pmode,
16823 stack_pointer_rtx,
16824 i * 8)),
16825 reg);
16826 RTX_FRAME_RELATED_P (tmp) = 1;
16827 XVECEXP (dwarf, 0, i + 1) = tmp;
16830 par = emit_insn (par);
16831 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16832 RTX_FRAME_RELATED_P (par) = 1;
16834 return count * 8;
16837 /* Emit a call instruction with pattern PAT. ADDR is the address of
16838 the call target. */
16840 void
16841 arm_emit_call_insn (rtx pat, rtx addr)
16843 rtx insn;
16845 insn = emit_call_insn (pat);
16847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16848 If the call might use such an entry, add a use of the PIC register
16849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16850 if (TARGET_VXWORKS_RTP
16851 && flag_pic
16852 && GET_CODE (addr) == SYMBOL_REF
16853 && (SYMBOL_REF_DECL (addr)
16854 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
16855 : !SYMBOL_REF_LOCAL_P (addr)))
16857 require_pic_register ();
16858 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
16862 /* Output a 'call' insn. */
16863 const char *
16864 output_call (rtx *operands)
16866 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
16868 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16869 if (REGNO (operands[0]) == LR_REGNUM)
16871 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
16872 output_asm_insn ("mov%?\t%0, %|lr", operands);
16875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16877 if (TARGET_INTERWORK || arm_arch4t)
16878 output_asm_insn ("bx%?\t%0", operands);
16879 else
16880 output_asm_insn ("mov%?\t%|pc, %0", operands);
16882 return "";
16885 /* Output a 'call' insn that is a reference in memory. This is
16886 disabled for ARMv5 and we prefer a blx instead because otherwise
16887 there's a significant performance overhead. */
16888 const char *
16889 output_call_mem (rtx *operands)
16891 gcc_assert (!arm_arch5);
16892 if (TARGET_INTERWORK)
16894 output_asm_insn ("ldr%?\t%|ip, %0", operands);
16895 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16896 output_asm_insn ("bx%?\t%|ip", operands);
16898 else if (regno_use_in (LR_REGNUM, operands[0]))
16900 /* LR is used in the memory address. We load the address in the
16901 first instruction. It's safe to use IP as the target of the
16902 load since the call will kill it anyway. */
16903 output_asm_insn ("ldr%?\t%|ip, %0", operands);
16904 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16905 if (arm_arch4t)
16906 output_asm_insn ("bx%?\t%|ip", operands);
16907 else
16908 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
16910 else
16912 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
16913 output_asm_insn ("ldr%?\t%|pc, %0", operands);
16916 return "";
16920 /* Output a move from arm registers to arm registers of a long double
16921 OPERANDS[0] is the destination.
16922 OPERANDS[1] is the source. */
16923 const char *
16924 output_mov_long_double_arm_from_arm (rtx *operands)
16926 /* We have to be careful here because the two might overlap. */
16927 int dest_start = REGNO (operands[0]);
16928 int src_start = REGNO (operands[1]);
16929 rtx ops[2];
16930 int i;
16932 if (dest_start < src_start)
16934 for (i = 0; i < 3; i++)
16936 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16937 ops[1] = gen_rtx_REG (SImode, src_start + i);
16938 output_asm_insn ("mov%?\t%0, %1", ops);
16941 else
16943 for (i = 2; i >= 0; i--)
16945 ops[0] = gen_rtx_REG (SImode, dest_start + i);
16946 ops[1] = gen_rtx_REG (SImode, src_start + i);
16947 output_asm_insn ("mov%?\t%0, %1", ops);
16951 return "";
16954 void
16955 arm_emit_movpair (rtx dest, rtx src)
16957 /* If the src is an immediate, simplify it. */
16958 if (CONST_INT_P (src))
16960 HOST_WIDE_INT val = INTVAL (src);
16961 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
16962 if ((val >> 16) & 0x0000ffff)
16963 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
16964 GEN_INT (16)),
16965 GEN_INT ((val >> 16) & 0x0000ffff));
16966 return;
16968 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
16969 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
16972 /* Output a move between double words. It must be REG<-MEM
16973 or MEM<-REG. */
16974 const char *
16975 output_move_double (rtx *operands, bool emit, int *count)
16977 enum rtx_code code0 = GET_CODE (operands[0]);
16978 enum rtx_code code1 = GET_CODE (operands[1]);
16979 rtx otherops[3];
16980 if (count)
16981 *count = 1;
16983 /* The only case when this might happen is when
16984 you are looking at the length of a DImode instruction
16985 that has an invalid constant in it. */
16986 if (code0 == REG && code1 != MEM)
16988 gcc_assert (!emit);
16989 *count = 2;
16990 return "";
16993 if (code0 == REG)
16995 unsigned int reg0 = REGNO (operands[0]);
16997 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
16999 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17001 switch (GET_CODE (XEXP (operands[1], 0)))
17003 case REG:
17005 if (emit)
17007 if (TARGET_LDRD
17008 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17009 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17010 else
17011 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17013 break;
17015 case PRE_INC:
17016 gcc_assert (TARGET_LDRD);
17017 if (emit)
17018 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17019 break;
17021 case PRE_DEC:
17022 if (emit)
17024 if (TARGET_LDRD)
17025 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17026 else
17027 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17029 break;
17031 case POST_INC:
17032 if (emit)
17034 if (TARGET_LDRD)
17035 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17036 else
17037 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17039 break;
17041 case POST_DEC:
17042 gcc_assert (TARGET_LDRD);
17043 if (emit)
17044 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17045 break;
17047 case PRE_MODIFY:
17048 case POST_MODIFY:
17049 /* Autoicrement addressing modes should never have overlapping
17050 base and destination registers, and overlapping index registers
17051 are already prohibited, so this doesn't need to worry about
17052 fix_cm3_ldrd. */
17053 otherops[0] = operands[0];
17054 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17055 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17057 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17059 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17061 /* Registers overlap so split out the increment. */
17062 if (emit)
17064 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17065 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17067 if (count)
17068 *count = 2;
17070 else
17072 /* Use a single insn if we can.
17073 FIXME: IWMMXT allows offsets larger than ldrd can
17074 handle, fix these up with a pair of ldr. */
17075 if (TARGET_THUMB2
17076 || !CONST_INT_P (otherops[2])
17077 || (INTVAL (otherops[2]) > -256
17078 && INTVAL (otherops[2]) < 256))
17080 if (emit)
17081 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17083 else
17085 if (emit)
17087 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17088 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17090 if (count)
17091 *count = 2;
17096 else
17098 /* Use a single insn if we can.
17099 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17100 fix these up with a pair of ldr. */
17101 if (TARGET_THUMB2
17102 || !CONST_INT_P (otherops[2])
17103 || (INTVAL (otherops[2]) > -256
17104 && INTVAL (otherops[2]) < 256))
17106 if (emit)
17107 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17109 else
17111 if (emit)
17113 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17114 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17116 if (count)
17117 *count = 2;
17120 break;
17122 case LABEL_REF:
17123 case CONST:
17124 /* We might be able to use ldrd %0, %1 here. However the range is
17125 different to ldr/adr, and it is broken on some ARMv7-M
17126 implementations. */
17127 /* Use the second register of the pair to avoid problematic
17128 overlap. */
17129 otherops[1] = operands[1];
17130 if (emit)
17131 output_asm_insn ("adr%?\t%0, %1", otherops);
17132 operands[1] = otherops[0];
17133 if (emit)
17135 if (TARGET_LDRD)
17136 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17137 else
17138 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17141 if (count)
17142 *count = 2;
17143 break;
17145 /* ??? This needs checking for thumb2. */
17146 default:
17147 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17148 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17150 otherops[0] = operands[0];
17151 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17152 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17154 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17156 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17158 switch ((int) INTVAL (otherops[2]))
17160 case -8:
17161 if (emit)
17162 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17163 return "";
17164 case -4:
17165 if (TARGET_THUMB2)
17166 break;
17167 if (emit)
17168 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17169 return "";
17170 case 4:
17171 if (TARGET_THUMB2)
17172 break;
17173 if (emit)
17174 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17175 return "";
17178 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17179 operands[1] = otherops[0];
17180 if (TARGET_LDRD
17181 && (REG_P (otherops[2])
17182 || TARGET_THUMB2
17183 || (CONST_INT_P (otherops[2])
17184 && INTVAL (otherops[2]) > -256
17185 && INTVAL (otherops[2]) < 256)))
17187 if (reg_overlap_mentioned_p (operands[0],
17188 otherops[2]))
17190 rtx tmp;
17191 /* Swap base and index registers over to
17192 avoid a conflict. */
17193 tmp = otherops[1];
17194 otherops[1] = otherops[2];
17195 otherops[2] = tmp;
17197 /* If both registers conflict, it will usually
17198 have been fixed by a splitter. */
17199 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17200 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17202 if (emit)
17204 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17205 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17207 if (count)
17208 *count = 2;
17210 else
17212 otherops[0] = operands[0];
17213 if (emit)
17214 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17216 return "";
17219 if (CONST_INT_P (otherops[2]))
17221 if (emit)
17223 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17224 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17225 else
17226 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17229 else
17231 if (emit)
17232 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17235 else
17237 if (emit)
17238 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17241 if (count)
17242 *count = 2;
17244 if (TARGET_LDRD)
17245 return "ldr%(d%)\t%0, [%1]";
17247 return "ldm%(ia%)\t%1, %M0";
17249 else
17251 otherops[1] = adjust_address (operands[1], SImode, 4);
17252 /* Take care of overlapping base/data reg. */
17253 if (reg_mentioned_p (operands[0], operands[1]))
17255 if (emit)
17257 output_asm_insn ("ldr%?\t%0, %1", otherops);
17258 output_asm_insn ("ldr%?\t%0, %1", operands);
17260 if (count)
17261 *count = 2;
17264 else
17266 if (emit)
17268 output_asm_insn ("ldr%?\t%0, %1", operands);
17269 output_asm_insn ("ldr%?\t%0, %1", otherops);
17271 if (count)
17272 *count = 2;
17277 else
17279 /* Constraints should ensure this. */
17280 gcc_assert (code0 == MEM && code1 == REG);
17281 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17282 || (TARGET_ARM && TARGET_LDRD));
17284 switch (GET_CODE (XEXP (operands[0], 0)))
17286 case REG:
17287 if (emit)
17289 if (TARGET_LDRD)
17290 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17291 else
17292 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17294 break;
17296 case PRE_INC:
17297 gcc_assert (TARGET_LDRD);
17298 if (emit)
17299 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17300 break;
17302 case PRE_DEC:
17303 if (emit)
17305 if (TARGET_LDRD)
17306 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17307 else
17308 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17310 break;
17312 case POST_INC:
17313 if (emit)
17315 if (TARGET_LDRD)
17316 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17317 else
17318 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17320 break;
17322 case POST_DEC:
17323 gcc_assert (TARGET_LDRD);
17324 if (emit)
17325 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17326 break;
17328 case PRE_MODIFY:
17329 case POST_MODIFY:
17330 otherops[0] = operands[1];
17331 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17332 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17334 /* IWMMXT allows offsets larger than ldrd can handle,
17335 fix these up with a pair of ldr. */
17336 if (!TARGET_THUMB2
17337 && CONST_INT_P (otherops[2])
17338 && (INTVAL(otherops[2]) <= -256
17339 || INTVAL(otherops[2]) >= 256))
17341 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17343 if (emit)
17345 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17346 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17348 if (count)
17349 *count = 2;
17351 else
17353 if (emit)
17355 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17356 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17358 if (count)
17359 *count = 2;
17362 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17364 if (emit)
17365 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17367 else
17369 if (emit)
17370 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17372 break;
17374 case PLUS:
17375 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17376 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17378 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17380 case -8:
17381 if (emit)
17382 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17383 return "";
17385 case -4:
17386 if (TARGET_THUMB2)
17387 break;
17388 if (emit)
17389 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17390 return "";
17392 case 4:
17393 if (TARGET_THUMB2)
17394 break;
17395 if (emit)
17396 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17397 return "";
17400 if (TARGET_LDRD
17401 && (REG_P (otherops[2])
17402 || TARGET_THUMB2
17403 || (CONST_INT_P (otherops[2])
17404 && INTVAL (otherops[2]) > -256
17405 && INTVAL (otherops[2]) < 256)))
17407 otherops[0] = operands[1];
17408 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17409 if (emit)
17410 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17411 return "";
17413 /* Fall through */
17415 default:
17416 otherops[0] = adjust_address (operands[0], SImode, 4);
17417 otherops[1] = operands[1];
17418 if (emit)
17420 output_asm_insn ("str%?\t%1, %0", operands);
17421 output_asm_insn ("str%?\t%H1, %0", otherops);
17423 if (count)
17424 *count = 2;
17428 return "";
17431 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17432 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17434 const char *
17435 output_move_quad (rtx *operands)
17437 if (REG_P (operands[0]))
17439 /* Load, or reg->reg move. */
17441 if (MEM_P (operands[1]))
17443 switch (GET_CODE (XEXP (operands[1], 0)))
17445 case REG:
17446 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17447 break;
17449 case LABEL_REF:
17450 case CONST:
17451 output_asm_insn ("adr%?\t%0, %1", operands);
17452 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17453 break;
17455 default:
17456 gcc_unreachable ();
17459 else
17461 rtx ops[2];
17462 int dest, src, i;
17464 gcc_assert (REG_P (operands[1]));
17466 dest = REGNO (operands[0]);
17467 src = REGNO (operands[1]);
17469 /* This seems pretty dumb, but hopefully GCC won't try to do it
17470 very often. */
17471 if (dest < src)
17472 for (i = 0; i < 4; i++)
17474 ops[0] = gen_rtx_REG (SImode, dest + i);
17475 ops[1] = gen_rtx_REG (SImode, src + i);
17476 output_asm_insn ("mov%?\t%0, %1", ops);
17478 else
17479 for (i = 3; i >= 0; i--)
17481 ops[0] = gen_rtx_REG (SImode, dest + i);
17482 ops[1] = gen_rtx_REG (SImode, src + i);
17483 output_asm_insn ("mov%?\t%0, %1", ops);
17487 else
17489 gcc_assert (MEM_P (operands[0]));
17490 gcc_assert (REG_P (operands[1]));
17491 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17493 switch (GET_CODE (XEXP (operands[0], 0)))
17495 case REG:
17496 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17497 break;
17499 default:
17500 gcc_unreachable ();
17504 return "";
17507 /* Output a VFP load or store instruction. */
17509 const char *
17510 output_move_vfp (rtx *operands)
17512 rtx reg, mem, addr, ops[2];
17513 int load = REG_P (operands[0]);
17514 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
17515 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
17516 const char *templ;
17517 char buff[50];
17518 enum machine_mode mode;
17520 reg = operands[!load];
17521 mem = operands[load];
17523 mode = GET_MODE (reg);
17525 gcc_assert (REG_P (reg));
17526 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
17527 gcc_assert (mode == SFmode
17528 || mode == DFmode
17529 || mode == SImode
17530 || mode == DImode
17531 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
17532 gcc_assert (MEM_P (mem));
17534 addr = XEXP (mem, 0);
17536 switch (GET_CODE (addr))
17538 case PRE_DEC:
17539 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
17540 ops[0] = XEXP (addr, 0);
17541 ops[1] = reg;
17542 break;
17544 case POST_INC:
17545 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
17546 ops[0] = XEXP (addr, 0);
17547 ops[1] = reg;
17548 break;
17550 default:
17551 templ = "f%s%c%%?\t%%%s0, %%1%s";
17552 ops[0] = reg;
17553 ops[1] = mem;
17554 break;
17557 sprintf (buff, templ,
17558 load ? "ld" : "st",
17559 dp ? 'd' : 's',
17560 dp ? "P" : "",
17561 integer_p ? "\t%@ int" : "");
17562 output_asm_insn (buff, ops);
17564 return "";
17567 /* Output a Neon double-word or quad-word load or store, or a load
17568 or store for larger structure modes.
17570 WARNING: The ordering of elements is weird in big-endian mode,
17571 because the EABI requires that vectors stored in memory appear
17572 as though they were stored by a VSTM, as required by the EABI.
17573 GCC RTL defines element ordering based on in-memory order.
17574 This can be different from the architectural ordering of elements
17575 within a NEON register. The intrinsics defined in arm_neon.h use the
17576 NEON register element ordering, not the GCC RTL element ordering.
17578 For example, the in-memory ordering of a big-endian a quadword
17579 vector with 16-bit elements when stored from register pair {d0,d1}
17580 will be (lowest address first, d0[N] is NEON register element N):
17582 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17584 When necessary, quadword registers (dN, dN+1) are moved to ARM
17585 registers from rN in the order:
17587 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17589 So that STM/LDM can be used on vectors in ARM registers, and the
17590 same memory layout will result as if VSTM/VLDM were used.
17592 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17593 possible, which allows use of appropriate alignment tags.
17594 Note that the choice of "64" is independent of the actual vector
17595 element size; this size simply ensures that the behavior is
17596 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17598 Due to limitations of those instructions, use of VST1.64/VLD1.64
17599 is not possible if:
17600 - the address contains PRE_DEC, or
17601 - the mode refers to more than 4 double-word registers
17603 In those cases, it would be possible to replace VSTM/VLDM by a
17604 sequence of instructions; this is not currently implemented since
17605 this is not certain to actually improve performance. */
17607 const char *
17608 output_move_neon (rtx *operands)
17610 rtx reg, mem, addr, ops[2];
17611 int regno, nregs, load = REG_P (operands[0]);
17612 const char *templ;
17613 char buff[50];
17614 enum machine_mode mode;
17616 reg = operands[!load];
17617 mem = operands[load];
17619 mode = GET_MODE (reg);
17621 gcc_assert (REG_P (reg));
17622 regno = REGNO (reg);
17623 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
17624 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
17625 || NEON_REGNO_OK_FOR_QUAD (regno));
17626 gcc_assert (VALID_NEON_DREG_MODE (mode)
17627 || VALID_NEON_QREG_MODE (mode)
17628 || VALID_NEON_STRUCT_MODE (mode));
17629 gcc_assert (MEM_P (mem));
17631 addr = XEXP (mem, 0);
17633 /* Strip off const from addresses like (const (plus (...))). */
17634 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17635 addr = XEXP (addr, 0);
17637 switch (GET_CODE (addr))
17639 case POST_INC:
17640 /* We have to use vldm / vstm for too-large modes. */
17641 if (nregs > 4)
17643 templ = "v%smia%%?\t%%0!, %%h1";
17644 ops[0] = XEXP (addr, 0);
17646 else
17648 templ = "v%s1.64\t%%h1, %%A0";
17649 ops[0] = mem;
17651 ops[1] = reg;
17652 break;
17654 case PRE_DEC:
17655 /* We have to use vldm / vstm in this case, since there is no
17656 pre-decrement form of the vld1 / vst1 instructions. */
17657 templ = "v%smdb%%?\t%%0!, %%h1";
17658 ops[0] = XEXP (addr, 0);
17659 ops[1] = reg;
17660 break;
17662 case POST_MODIFY:
17663 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17664 gcc_unreachable ();
17666 case LABEL_REF:
17667 case PLUS:
17669 int i;
17670 int overlap = -1;
17671 for (i = 0; i < nregs; i++)
17673 /* We're only using DImode here because it's a convenient size. */
17674 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
17675 ops[1] = adjust_address (mem, DImode, 8 * i);
17676 if (reg_overlap_mentioned_p (ops[0], mem))
17678 gcc_assert (overlap == -1);
17679 overlap = i;
17681 else
17683 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17684 output_asm_insn (buff, ops);
17687 if (overlap != -1)
17689 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
17690 ops[1] = adjust_address (mem, SImode, 8 * overlap);
17691 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17692 output_asm_insn (buff, ops);
17695 return "";
17698 default:
17699 /* We have to use vldm / vstm for too-large modes. */
17700 if (nregs > 4)
17701 templ = "v%smia%%?\t%%m0, %%h1";
17702 else
17703 templ = "v%s1.64\t%%h1, %%A0";
17705 ops[0] = mem;
17706 ops[1] = reg;
17709 sprintf (buff, templ, load ? "ld" : "st");
17710 output_asm_insn (buff, ops);
17712 return "";
17715 /* Compute and return the length of neon_mov<mode>, where <mode> is
17716 one of VSTRUCT modes: EI, OI, CI or XI. */
17718 arm_attr_length_move_neon (rtx insn)
17720 rtx reg, mem, addr;
17721 int load;
17722 enum machine_mode mode;
17724 extract_insn_cached (insn);
17726 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
17728 mode = GET_MODE (recog_data.operand[0]);
17729 switch (mode)
17731 case EImode:
17732 case OImode:
17733 return 8;
17734 case CImode:
17735 return 12;
17736 case XImode:
17737 return 16;
17738 default:
17739 gcc_unreachable ();
17743 load = REG_P (recog_data.operand[0]);
17744 reg = recog_data.operand[!load];
17745 mem = recog_data.operand[load];
17747 gcc_assert (MEM_P (mem));
17749 mode = GET_MODE (reg);
17750 addr = XEXP (mem, 0);
17752 /* Strip off const from addresses like (const (plus (...))). */
17753 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17754 addr = XEXP (addr, 0);
17756 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
17758 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
17759 return insns * 4;
17761 else
17762 return 4;
17765 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17766 return zero. */
17769 arm_address_offset_is_imm (rtx insn)
17771 rtx mem, addr;
17773 extract_insn_cached (insn);
17775 if (REG_P (recog_data.operand[0]))
17776 return 0;
17778 mem = recog_data.operand[0];
17780 gcc_assert (MEM_P (mem));
17782 addr = XEXP (mem, 0);
17784 if (REG_P (addr)
17785 || (GET_CODE (addr) == PLUS
17786 && REG_P (XEXP (addr, 0))
17787 && CONST_INT_P (XEXP (addr, 1))))
17788 return 1;
17789 else
17790 return 0;
17793 /* Output an ADD r, s, #n where n may be too big for one instruction.
17794 If adding zero to one register, output nothing. */
17795 const char *
17796 output_add_immediate (rtx *operands)
17798 HOST_WIDE_INT n = INTVAL (operands[2]);
17800 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
17802 if (n < 0)
17803 output_multi_immediate (operands,
17804 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17805 -n);
17806 else
17807 output_multi_immediate (operands,
17808 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17812 return "";
17815 /* Output a multiple immediate operation.
17816 OPERANDS is the vector of operands referred to in the output patterns.
17817 INSTR1 is the output pattern to use for the first constant.
17818 INSTR2 is the output pattern to use for subsequent constants.
17819 IMMED_OP is the index of the constant slot in OPERANDS.
17820 N is the constant value. */
17821 static const char *
17822 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
17823 int immed_op, HOST_WIDE_INT n)
17825 #if HOST_BITS_PER_WIDE_INT > 32
17826 n &= 0xffffffff;
17827 #endif
17829 if (n == 0)
17831 /* Quick and easy output. */
17832 operands[immed_op] = const0_rtx;
17833 output_asm_insn (instr1, operands);
17835 else
17837 int i;
17838 const char * instr = instr1;
17840 /* Note that n is never zero here (which would give no output). */
17841 for (i = 0; i < 32; i += 2)
17843 if (n & (3 << i))
17845 operands[immed_op] = GEN_INT (n & (255 << i));
17846 output_asm_insn (instr, operands);
17847 instr = instr2;
17848 i += 6;
17853 return "";
17856 /* Return the name of a shifter operation. */
17857 static const char *
17858 arm_shift_nmem(enum rtx_code code)
17860 switch (code)
17862 case ASHIFT:
17863 return ARM_LSL_NAME;
17865 case ASHIFTRT:
17866 return "asr";
17868 case LSHIFTRT:
17869 return "lsr";
17871 case ROTATERT:
17872 return "ror";
17874 default:
17875 abort();
17879 /* Return the appropriate ARM instruction for the operation code.
17880 The returned result should not be overwritten. OP is the rtx of the
17881 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17882 was shifted. */
17883 const char *
17884 arithmetic_instr (rtx op, int shift_first_arg)
17886 switch (GET_CODE (op))
17888 case PLUS:
17889 return "add";
17891 case MINUS:
17892 return shift_first_arg ? "rsb" : "sub";
17894 case IOR:
17895 return "orr";
17897 case XOR:
17898 return "eor";
17900 case AND:
17901 return "and";
17903 case ASHIFT:
17904 case ASHIFTRT:
17905 case LSHIFTRT:
17906 case ROTATERT:
17907 return arm_shift_nmem(GET_CODE(op));
17909 default:
17910 gcc_unreachable ();
17914 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17915 for the operation code. The returned result should not be overwritten.
17916 OP is the rtx code of the shift.
17917 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17918 shift. */
17919 static const char *
17920 shift_op (rtx op, HOST_WIDE_INT *amountp)
17922 const char * mnem;
17923 enum rtx_code code = GET_CODE (op);
17925 switch (code)
17927 case ROTATE:
17928 if (!CONST_INT_P (XEXP (op, 1)))
17930 output_operand_lossage ("invalid shift operand");
17931 return NULL;
17934 code = ROTATERT;
17935 *amountp = 32 - INTVAL (XEXP (op, 1));
17936 mnem = "ror";
17937 break;
17939 case ASHIFT:
17940 case ASHIFTRT:
17941 case LSHIFTRT:
17942 case ROTATERT:
17943 mnem = arm_shift_nmem(code);
17944 if (CONST_INT_P (XEXP (op, 1)))
17946 *amountp = INTVAL (XEXP (op, 1));
17948 else if (REG_P (XEXP (op, 1)))
17950 *amountp = -1;
17951 return mnem;
17953 else
17955 output_operand_lossage ("invalid shift operand");
17956 return NULL;
17958 break;
17960 case MULT:
17961 /* We never have to worry about the amount being other than a
17962 power of 2, since this case can never be reloaded from a reg. */
17963 if (!CONST_INT_P (XEXP (op, 1)))
17965 output_operand_lossage ("invalid shift operand");
17966 return NULL;
17969 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
17971 /* Amount must be a power of two. */
17972 if (*amountp & (*amountp - 1))
17974 output_operand_lossage ("invalid shift operand");
17975 return NULL;
17978 *amountp = int_log2 (*amountp);
17979 return ARM_LSL_NAME;
17981 default:
17982 output_operand_lossage ("invalid shift operand");
17983 return NULL;
17986 /* This is not 100% correct, but follows from the desire to merge
17987 multiplication by a power of 2 with the recognizer for a
17988 shift. >=32 is not a valid shift for "lsl", so we must try and
17989 output a shift that produces the correct arithmetical result.
17990 Using lsr #32 is identical except for the fact that the carry bit
17991 is not set correctly if we set the flags; but we never use the
17992 carry bit from such an operation, so we can ignore that. */
17993 if (code == ROTATERT)
17994 /* Rotate is just modulo 32. */
17995 *amountp &= 31;
17996 else if (*amountp != (*amountp & 31))
17998 if (code == ASHIFT)
17999 mnem = "lsr";
18000 *amountp = 32;
18003 /* Shifts of 0 are no-ops. */
18004 if (*amountp == 0)
18005 return NULL;
18007 return mnem;
18010 /* Obtain the shift from the POWER of two. */
18012 static HOST_WIDE_INT
18013 int_log2 (HOST_WIDE_INT power)
18015 HOST_WIDE_INT shift = 0;
18017 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18019 gcc_assert (shift <= 31);
18020 shift++;
18023 return shift;
18026 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18027 because /bin/as is horribly restrictive. The judgement about
18028 whether or not each character is 'printable' (and can be output as
18029 is) or not (and must be printed with an octal escape) must be made
18030 with reference to the *host* character set -- the situation is
18031 similar to that discussed in the comments above pp_c_char in
18032 c-pretty-print.c. */
18034 #define MAX_ASCII_LEN 51
18036 void
18037 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18039 int i;
18040 int len_so_far = 0;
18042 fputs ("\t.ascii\t\"", stream);
18044 for (i = 0; i < len; i++)
18046 int c = p[i];
18048 if (len_so_far >= MAX_ASCII_LEN)
18050 fputs ("\"\n\t.ascii\t\"", stream);
18051 len_so_far = 0;
18054 if (ISPRINT (c))
18056 if (c == '\\' || c == '\"')
18058 putc ('\\', stream);
18059 len_so_far++;
18061 putc (c, stream);
18062 len_so_far++;
18064 else
18066 fprintf (stream, "\\%03o", c);
18067 len_so_far += 4;
18071 fputs ("\"\n", stream);
18074 /* Compute the register save mask for registers 0 through 12
18075 inclusive. This code is used by arm_compute_save_reg_mask. */
18077 static unsigned long
18078 arm_compute_save_reg0_reg12_mask (void)
18080 unsigned long func_type = arm_current_func_type ();
18081 unsigned long save_reg_mask = 0;
18082 unsigned int reg;
18084 if (IS_INTERRUPT (func_type))
18086 unsigned int max_reg;
18087 /* Interrupt functions must not corrupt any registers,
18088 even call clobbered ones. If this is a leaf function
18089 we can just examine the registers used by the RTL, but
18090 otherwise we have to assume that whatever function is
18091 called might clobber anything, and so we have to save
18092 all the call-clobbered registers as well. */
18093 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18094 /* FIQ handlers have registers r8 - r12 banked, so
18095 we only need to check r0 - r7, Normal ISRs only
18096 bank r14 and r15, so we must check up to r12.
18097 r13 is the stack pointer which is always preserved,
18098 so we do not need to consider it here. */
18099 max_reg = 7;
18100 else
18101 max_reg = 12;
18103 for (reg = 0; reg <= max_reg; reg++)
18104 if (df_regs_ever_live_p (reg)
18105 || (! crtl->is_leaf && call_used_regs[reg]))
18106 save_reg_mask |= (1 << reg);
18108 /* Also save the pic base register if necessary. */
18109 if (flag_pic
18110 && !TARGET_SINGLE_PIC_BASE
18111 && arm_pic_register != INVALID_REGNUM
18112 && crtl->uses_pic_offset_table)
18113 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18115 else if (IS_VOLATILE(func_type))
18117 /* For noreturn functions we historically omitted register saves
18118 altogether. However this really messes up debugging. As a
18119 compromise save just the frame pointers. Combined with the link
18120 register saved elsewhere this should be sufficient to get
18121 a backtrace. */
18122 if (frame_pointer_needed)
18123 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18124 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18125 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18126 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18127 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18129 else
18131 /* In the normal case we only need to save those registers
18132 which are call saved and which are used by this function. */
18133 for (reg = 0; reg <= 11; reg++)
18134 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18135 save_reg_mask |= (1 << reg);
18137 /* Handle the frame pointer as a special case. */
18138 if (frame_pointer_needed)
18139 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18141 /* If we aren't loading the PIC register,
18142 don't stack it even though it may be live. */
18143 if (flag_pic
18144 && !TARGET_SINGLE_PIC_BASE
18145 && arm_pic_register != INVALID_REGNUM
18146 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18147 || crtl->uses_pic_offset_table))
18148 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18150 /* The prologue will copy SP into R0, so save it. */
18151 if (IS_STACKALIGN (func_type))
18152 save_reg_mask |= 1;
18155 /* Save registers so the exception handler can modify them. */
18156 if (crtl->calls_eh_return)
18158 unsigned int i;
18160 for (i = 0; ; i++)
18162 reg = EH_RETURN_DATA_REGNO (i);
18163 if (reg == INVALID_REGNUM)
18164 break;
18165 save_reg_mask |= 1 << reg;
18169 return save_reg_mask;
18172 /* Return true if r3 is live at the start of the function. */
18174 static bool
18175 arm_r3_live_at_start_p (void)
18177 /* Just look at cfg info, which is still close enough to correct at this
18178 point. This gives false positives for broken functions that might use
18179 uninitialized data that happens to be allocated in r3, but who cares? */
18180 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
18183 /* Compute the number of bytes used to store the static chain register on the
18184 stack, above the stack frame. We need to know this accurately to get the
18185 alignment of the rest of the stack frame correct. */
18187 static int
18188 arm_compute_static_chain_stack_bytes (void)
18190 /* See the defining assertion in arm_expand_prologue. */
18191 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18192 && IS_NESTED (arm_current_func_type ())
18193 && arm_r3_live_at_start_p ()
18194 && crtl->args.pretend_args_size == 0)
18195 return 4;
18197 return 0;
18200 /* Compute a bit mask of which registers need to be
18201 saved on the stack for the current function.
18202 This is used by arm_get_frame_offsets, which may add extra registers. */
18204 static unsigned long
18205 arm_compute_save_reg_mask (void)
18207 unsigned int save_reg_mask = 0;
18208 unsigned long func_type = arm_current_func_type ();
18209 unsigned int reg;
18211 if (IS_NAKED (func_type))
18212 /* This should never really happen. */
18213 return 0;
18215 /* If we are creating a stack frame, then we must save the frame pointer,
18216 IP (which will hold the old stack pointer), LR and the PC. */
18217 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18218 save_reg_mask |=
18219 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18220 | (1 << IP_REGNUM)
18221 | (1 << LR_REGNUM)
18222 | (1 << PC_REGNUM);
18224 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18226 /* Decide if we need to save the link register.
18227 Interrupt routines have their own banked link register,
18228 so they never need to save it.
18229 Otherwise if we do not use the link register we do not need to save
18230 it. If we are pushing other registers onto the stack however, we
18231 can save an instruction in the epilogue by pushing the link register
18232 now and then popping it back into the PC. This incurs extra memory
18233 accesses though, so we only do it when optimizing for size, and only
18234 if we know that we will not need a fancy return sequence. */
18235 if (df_regs_ever_live_p (LR_REGNUM)
18236 || (save_reg_mask
18237 && optimize_size
18238 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18239 && !crtl->calls_eh_return))
18240 save_reg_mask |= 1 << LR_REGNUM;
18242 if (cfun->machine->lr_save_eliminated)
18243 save_reg_mask &= ~ (1 << LR_REGNUM);
18245 if (TARGET_REALLY_IWMMXT
18246 && ((bit_count (save_reg_mask)
18247 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18248 arm_compute_static_chain_stack_bytes())
18249 ) % 2) != 0)
18251 /* The total number of registers that are going to be pushed
18252 onto the stack is odd. We need to ensure that the stack
18253 is 64-bit aligned before we start to save iWMMXt registers,
18254 and also before we start to create locals. (A local variable
18255 might be a double or long long which we will load/store using
18256 an iWMMXt instruction). Therefore we need to push another
18257 ARM register, so that the stack will be 64-bit aligned. We
18258 try to avoid using the arg registers (r0 -r3) as they might be
18259 used to pass values in a tail call. */
18260 for (reg = 4; reg <= 12; reg++)
18261 if ((save_reg_mask & (1 << reg)) == 0)
18262 break;
18264 if (reg <= 12)
18265 save_reg_mask |= (1 << reg);
18266 else
18268 cfun->machine->sibcall_blocked = 1;
18269 save_reg_mask |= (1 << 3);
18273 /* We may need to push an additional register for use initializing the
18274 PIC base register. */
18275 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18276 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18278 reg = thumb_find_work_register (1 << 4);
18279 if (!call_used_regs[reg])
18280 save_reg_mask |= (1 << reg);
18283 return save_reg_mask;
18287 /* Compute a bit mask of which registers need to be
18288 saved on the stack for the current function. */
18289 static unsigned long
18290 thumb1_compute_save_reg_mask (void)
18292 unsigned long mask;
18293 unsigned reg;
18295 mask = 0;
18296 for (reg = 0; reg < 12; reg ++)
18297 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18298 mask |= 1 << reg;
18300 if (flag_pic
18301 && !TARGET_SINGLE_PIC_BASE
18302 && arm_pic_register != INVALID_REGNUM
18303 && crtl->uses_pic_offset_table)
18304 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18306 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18307 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18308 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18310 /* LR will also be pushed if any lo regs are pushed. */
18311 if (mask & 0xff || thumb_force_lr_save ())
18312 mask |= (1 << LR_REGNUM);
18314 /* Make sure we have a low work register if we need one.
18315 We will need one if we are going to push a high register,
18316 but we are not currently intending to push a low register. */
18317 if ((mask & 0xff) == 0
18318 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18320 /* Use thumb_find_work_register to choose which register
18321 we will use. If the register is live then we will
18322 have to push it. Use LAST_LO_REGNUM as our fallback
18323 choice for the register to select. */
18324 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18325 /* Make sure the register returned by thumb_find_work_register is
18326 not part of the return value. */
18327 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18328 reg = LAST_LO_REGNUM;
18330 if (! call_used_regs[reg])
18331 mask |= 1 << reg;
18334 /* The 504 below is 8 bytes less than 512 because there are two possible
18335 alignment words. We can't tell here if they will be present or not so we
18336 have to play it safe and assume that they are. */
18337 if ((CALLER_INTERWORKING_SLOT_SIZE +
18338 ROUND_UP_WORD (get_frame_size ()) +
18339 crtl->outgoing_args_size) >= 504)
18341 /* This is the same as the code in thumb1_expand_prologue() which
18342 determines which register to use for stack decrement. */
18343 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18344 if (mask & (1 << reg))
18345 break;
18347 if (reg > LAST_LO_REGNUM)
18349 /* Make sure we have a register available for stack decrement. */
18350 mask |= 1 << LAST_LO_REGNUM;
18354 return mask;
18358 /* Return the number of bytes required to save VFP registers. */
18359 static int
18360 arm_get_vfp_saved_size (void)
18362 unsigned int regno;
18363 int count;
18364 int saved;
18366 saved = 0;
18367 /* Space for saved VFP registers. */
18368 if (TARGET_HARD_FLOAT && TARGET_VFP)
18370 count = 0;
18371 for (regno = FIRST_VFP_REGNUM;
18372 regno < LAST_VFP_REGNUM;
18373 regno += 2)
18375 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18376 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18378 if (count > 0)
18380 /* Workaround ARM10 VFPr1 bug. */
18381 if (count == 2 && !arm_arch6)
18382 count++;
18383 saved += count * 8;
18385 count = 0;
18387 else
18388 count++;
18390 if (count > 0)
18392 if (count == 2 && !arm_arch6)
18393 count++;
18394 saved += count * 8;
18397 return saved;
18401 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18402 everything bar the final return instruction. If simple_return is true,
18403 then do not output epilogue, because it has already been emitted in RTL. */
18404 const char *
18405 output_return_instruction (rtx operand, bool really_return, bool reverse,
18406 bool simple_return)
18408 char conditional[10];
18409 char instr[100];
18410 unsigned reg;
18411 unsigned long live_regs_mask;
18412 unsigned long func_type;
18413 arm_stack_offsets *offsets;
18415 func_type = arm_current_func_type ();
18417 if (IS_NAKED (func_type))
18418 return "";
18420 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18422 /* If this function was declared non-returning, and we have
18423 found a tail call, then we have to trust that the called
18424 function won't return. */
18425 if (really_return)
18427 rtx ops[2];
18429 /* Otherwise, trap an attempted return by aborting. */
18430 ops[0] = operand;
18431 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18432 : "abort");
18433 assemble_external_libcall (ops[1]);
18434 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18437 return "";
18440 gcc_assert (!cfun->calls_alloca || really_return);
18442 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18444 cfun->machine->return_used_this_function = 1;
18446 offsets = arm_get_frame_offsets ();
18447 live_regs_mask = offsets->saved_regs_mask;
18449 if (!simple_return && live_regs_mask)
18451 const char * return_reg;
18453 /* If we do not have any special requirements for function exit
18454 (e.g. interworking) then we can load the return address
18455 directly into the PC. Otherwise we must load it into LR. */
18456 if (really_return
18457 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18458 return_reg = reg_names[PC_REGNUM];
18459 else
18460 return_reg = reg_names[LR_REGNUM];
18462 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18464 /* There are three possible reasons for the IP register
18465 being saved. 1) a stack frame was created, in which case
18466 IP contains the old stack pointer, or 2) an ISR routine
18467 corrupted it, or 3) it was saved to align the stack on
18468 iWMMXt. In case 1, restore IP into SP, otherwise just
18469 restore IP. */
18470 if (frame_pointer_needed)
18472 live_regs_mask &= ~ (1 << IP_REGNUM);
18473 live_regs_mask |= (1 << SP_REGNUM);
18475 else
18476 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18479 /* On some ARM architectures it is faster to use LDR rather than
18480 LDM to load a single register. On other architectures, the
18481 cost is the same. In 26 bit mode, or for exception handlers,
18482 we have to use LDM to load the PC so that the CPSR is also
18483 restored. */
18484 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18485 if (live_regs_mask == (1U << reg))
18486 break;
18488 if (reg <= LAST_ARM_REGNUM
18489 && (reg != LR_REGNUM
18490 || ! really_return
18491 || ! IS_INTERRUPT (func_type)))
18493 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18494 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18496 else
18498 char *p;
18499 int first = 1;
18501 /* Generate the load multiple instruction to restore the
18502 registers. Note we can get here, even if
18503 frame_pointer_needed is true, but only if sp already
18504 points to the base of the saved core registers. */
18505 if (live_regs_mask & (1 << SP_REGNUM))
18507 unsigned HOST_WIDE_INT stack_adjust;
18509 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
18510 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
18512 if (stack_adjust && arm_arch5 && TARGET_ARM)
18513 if (TARGET_UNIFIED_ASM)
18514 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
18515 else
18516 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
18517 else
18519 /* If we can't use ldmib (SA110 bug),
18520 then try to pop r3 instead. */
18521 if (stack_adjust)
18522 live_regs_mask |= 1 << 3;
18524 if (TARGET_UNIFIED_ASM)
18525 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
18526 else
18527 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
18530 else
18531 if (TARGET_UNIFIED_ASM)
18532 sprintf (instr, "pop%s\t{", conditional);
18533 else
18534 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
18536 p = instr + strlen (instr);
18538 for (reg = 0; reg <= SP_REGNUM; reg++)
18539 if (live_regs_mask & (1 << reg))
18541 int l = strlen (reg_names[reg]);
18543 if (first)
18544 first = 0;
18545 else
18547 memcpy (p, ", ", 2);
18548 p += 2;
18551 memcpy (p, "%|", 2);
18552 memcpy (p + 2, reg_names[reg], l);
18553 p += l + 2;
18556 if (live_regs_mask & (1 << LR_REGNUM))
18558 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
18559 /* If returning from an interrupt, restore the CPSR. */
18560 if (IS_INTERRUPT (func_type))
18561 strcat (p, "^");
18563 else
18564 strcpy (p, "}");
18567 output_asm_insn (instr, & operand);
18569 /* See if we need to generate an extra instruction to
18570 perform the actual function return. */
18571 if (really_return
18572 && func_type != ARM_FT_INTERWORKED
18573 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
18575 /* The return has already been handled
18576 by loading the LR into the PC. */
18577 return "";
18581 if (really_return)
18583 switch ((int) ARM_FUNC_TYPE (func_type))
18585 case ARM_FT_ISR:
18586 case ARM_FT_FIQ:
18587 /* ??? This is wrong for unified assembly syntax. */
18588 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
18589 break;
18591 case ARM_FT_INTERWORKED:
18592 sprintf (instr, "bx%s\t%%|lr", conditional);
18593 break;
18595 case ARM_FT_EXCEPTION:
18596 /* ??? This is wrong for unified assembly syntax. */
18597 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
18598 break;
18600 default:
18601 /* Use bx if it's available. */
18602 if (arm_arch5 || arm_arch4t)
18603 sprintf (instr, "bx%s\t%%|lr", conditional);
18604 else
18605 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
18606 break;
18609 output_asm_insn (instr, & operand);
18612 return "";
18615 /* Write the function name into the code section, directly preceding
18616 the function prologue.
18618 Code will be output similar to this:
18620 .ascii "arm_poke_function_name", 0
18621 .align
18623 .word 0xff000000 + (t1 - t0)
18624 arm_poke_function_name
18625 mov ip, sp
18626 stmfd sp!, {fp, ip, lr, pc}
18627 sub fp, ip, #4
18629 When performing a stack backtrace, code can inspect the value
18630 of 'pc' stored at 'fp' + 0. If the trace function then looks
18631 at location pc - 12 and the top 8 bits are set, then we know
18632 that there is a function name embedded immediately preceding this
18633 location and has length ((pc[-3]) & 0xff000000).
18635 We assume that pc is declared as a pointer to an unsigned long.
18637 It is of no benefit to output the function name if we are assembling
18638 a leaf function. These function types will not contain a stack
18639 backtrace structure, therefore it is not possible to determine the
18640 function name. */
18641 void
18642 arm_poke_function_name (FILE *stream, const char *name)
18644 unsigned long alignlength;
18645 unsigned long length;
18646 rtx x;
18648 length = strlen (name) + 1;
18649 alignlength = ROUND_UP_WORD (length);
18651 ASM_OUTPUT_ASCII (stream, name, length);
18652 ASM_OUTPUT_ALIGN (stream, 2);
18653 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
18654 assemble_aligned_integer (UNITS_PER_WORD, x);
18657 /* Place some comments into the assembler stream
18658 describing the current function. */
18659 static void
18660 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
18662 unsigned long func_type;
18664 /* ??? Do we want to print some of the below anyway? */
18665 if (TARGET_THUMB1)
18666 return;
18668 /* Sanity check. */
18669 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
18671 func_type = arm_current_func_type ();
18673 switch ((int) ARM_FUNC_TYPE (func_type))
18675 default:
18676 case ARM_FT_NORMAL:
18677 break;
18678 case ARM_FT_INTERWORKED:
18679 asm_fprintf (f, "\t%@ Function supports interworking.\n");
18680 break;
18681 case ARM_FT_ISR:
18682 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
18683 break;
18684 case ARM_FT_FIQ:
18685 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
18686 break;
18687 case ARM_FT_EXCEPTION:
18688 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
18689 break;
18692 if (IS_NAKED (func_type))
18693 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18695 if (IS_VOLATILE (func_type))
18696 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
18698 if (IS_NESTED (func_type))
18699 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
18700 if (IS_STACKALIGN (func_type))
18701 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18703 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18704 crtl->args.size,
18705 crtl->args.pretend_args_size, frame_size);
18707 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18708 frame_pointer_needed,
18709 cfun->machine->uses_anonymous_args);
18711 if (cfun->machine->lr_save_eliminated)
18712 asm_fprintf (f, "\t%@ link register save eliminated.\n");
18714 if (crtl->calls_eh_return)
18715 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
18719 static void
18720 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
18721 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
18723 arm_stack_offsets *offsets;
18725 if (TARGET_THUMB1)
18727 int regno;
18729 /* Emit any call-via-reg trampolines that are needed for v4t support
18730 of call_reg and call_value_reg type insns. */
18731 for (regno = 0; regno < LR_REGNUM; regno++)
18733 rtx label = cfun->machine->call_via[regno];
18735 if (label != NULL)
18737 switch_to_section (function_section (current_function_decl));
18738 targetm.asm_out.internal_label (asm_out_file, "L",
18739 CODE_LABEL_NUMBER (label));
18740 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18744 /* ??? Probably not safe to set this here, since it assumes that a
18745 function will be emitted as assembly immediately after we generate
18746 RTL for it. This does not happen for inline functions. */
18747 cfun->machine->return_used_this_function = 0;
18749 else /* TARGET_32BIT */
18751 /* We need to take into account any stack-frame rounding. */
18752 offsets = arm_get_frame_offsets ();
18754 gcc_assert (!use_return_insn (FALSE, NULL)
18755 || (cfun->machine->return_used_this_function != 0)
18756 || offsets->saved_regs == offsets->outgoing_args
18757 || frame_pointer_needed);
18759 /* Reset the ARM-specific per-function variables. */
18760 after_arm_reorg = 0;
18764 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18765 STR and STRD. If an even number of registers are being pushed, one
18766 or more STRD patterns are created for each register pair. If an
18767 odd number of registers are pushed, emit an initial STR followed by
18768 as many STRD instructions as are needed. This works best when the
18769 stack is initially 64-bit aligned (the normal case), since it
18770 ensures that each STRD is also 64-bit aligned. */
18771 static void
18772 thumb2_emit_strd_push (unsigned long saved_regs_mask)
18774 int num_regs = 0;
18775 int i;
18776 int regno;
18777 rtx par = NULL_RTX;
18778 rtx dwarf = NULL_RTX;
18779 rtx tmp;
18780 bool first = true;
18782 num_regs = bit_count (saved_regs_mask);
18784 /* Must be at least one register to save, and can't save SP or PC. */
18785 gcc_assert (num_regs > 0 && num_regs <= 14);
18786 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18787 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18789 /* Create sequence for DWARF info. All the frame-related data for
18790 debugging is held in this wrapper. */
18791 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18793 /* Describe the stack adjustment. */
18794 tmp = gen_rtx_SET (VOIDmode,
18795 stack_pointer_rtx,
18796 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18797 RTX_FRAME_RELATED_P (tmp) = 1;
18798 XVECEXP (dwarf, 0, 0) = tmp;
18800 /* Find the first register. */
18801 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
18804 i = 0;
18806 /* If there's an odd number of registers to push. Start off by
18807 pushing a single register. This ensures that subsequent strd
18808 operations are dword aligned (assuming that SP was originally
18809 64-bit aligned). */
18810 if ((num_regs & 1) != 0)
18812 rtx reg, mem, insn;
18814 reg = gen_rtx_REG (SImode, regno);
18815 if (num_regs == 1)
18816 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
18817 stack_pointer_rtx));
18818 else
18819 mem = gen_frame_mem (Pmode,
18820 gen_rtx_PRE_MODIFY
18821 (Pmode, stack_pointer_rtx,
18822 plus_constant (Pmode, stack_pointer_rtx,
18823 -4 * num_regs)));
18825 tmp = gen_rtx_SET (VOIDmode, mem, reg);
18826 RTX_FRAME_RELATED_P (tmp) = 1;
18827 insn = emit_insn (tmp);
18828 RTX_FRAME_RELATED_P (insn) = 1;
18829 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18830 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
18831 reg);
18832 RTX_FRAME_RELATED_P (tmp) = 1;
18833 i++;
18834 regno++;
18835 XVECEXP (dwarf, 0, i) = tmp;
18836 first = false;
18839 while (i < num_regs)
18840 if (saved_regs_mask & (1 << regno))
18842 rtx reg1, reg2, mem1, mem2;
18843 rtx tmp0, tmp1, tmp2;
18844 int regno2;
18846 /* Find the register to pair with this one. */
18847 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
18848 regno2++)
18851 reg1 = gen_rtx_REG (SImode, regno);
18852 reg2 = gen_rtx_REG (SImode, regno2);
18854 if (first)
18856 rtx insn;
18858 first = false;
18859 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18860 stack_pointer_rtx,
18861 -4 * num_regs));
18862 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18863 stack_pointer_rtx,
18864 -4 * (num_regs - 1)));
18865 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18866 plus_constant (Pmode, stack_pointer_rtx,
18867 -4 * (num_regs)));
18868 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
18869 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
18870 RTX_FRAME_RELATED_P (tmp0) = 1;
18871 RTX_FRAME_RELATED_P (tmp1) = 1;
18872 RTX_FRAME_RELATED_P (tmp2) = 1;
18873 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
18874 XVECEXP (par, 0, 0) = tmp0;
18875 XVECEXP (par, 0, 1) = tmp1;
18876 XVECEXP (par, 0, 2) = tmp2;
18877 insn = emit_insn (par);
18878 RTX_FRAME_RELATED_P (insn) = 1;
18879 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18881 else
18883 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
18884 stack_pointer_rtx,
18885 4 * i));
18886 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
18887 stack_pointer_rtx,
18888 4 * (i + 1)));
18889 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
18890 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
18891 RTX_FRAME_RELATED_P (tmp1) = 1;
18892 RTX_FRAME_RELATED_P (tmp2) = 1;
18893 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
18894 XVECEXP (par, 0, 0) = tmp1;
18895 XVECEXP (par, 0, 1) = tmp2;
18896 emit_insn (par);
18899 /* Create unwind information. This is an approximation. */
18900 tmp1 = gen_rtx_SET (VOIDmode,
18901 gen_frame_mem (Pmode,
18902 plus_constant (Pmode,
18903 stack_pointer_rtx,
18904 4 * i)),
18905 reg1);
18906 tmp2 = gen_rtx_SET (VOIDmode,
18907 gen_frame_mem (Pmode,
18908 plus_constant (Pmode,
18909 stack_pointer_rtx,
18910 4 * (i + 1))),
18911 reg2);
18913 RTX_FRAME_RELATED_P (tmp1) = 1;
18914 RTX_FRAME_RELATED_P (tmp2) = 1;
18915 XVECEXP (dwarf, 0, i + 1) = tmp1;
18916 XVECEXP (dwarf, 0, i + 2) = tmp2;
18917 i += 2;
18918 regno = regno2 + 1;
18920 else
18921 regno++;
18923 return;
18926 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18927 whenever possible, otherwise it emits single-word stores. The first store
18928 also allocates stack space for all saved registers, using writeback with
18929 post-addressing mode. All other stores use offset addressing. If no STRD
18930 can be emitted, this function emits a sequence of single-word stores,
18931 and not an STM as before, because single-word stores provide more freedom
18932 scheduling and can be turned into an STM by peephole optimizations. */
18933 static void
18934 arm_emit_strd_push (unsigned long saved_regs_mask)
18936 int num_regs = 0;
18937 int i, j, dwarf_index = 0;
18938 int offset = 0;
18939 rtx dwarf = NULL_RTX;
18940 rtx insn = NULL_RTX;
18941 rtx tmp, mem;
18943 /* TODO: A more efficient code can be emitted by changing the
18944 layout, e.g., first push all pairs that can use STRD to keep the
18945 stack aligned, and then push all other registers. */
18946 for (i = 0; i <= LAST_ARM_REGNUM; i++)
18947 if (saved_regs_mask & (1 << i))
18948 num_regs++;
18950 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18951 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
18952 gcc_assert (num_regs > 0);
18954 /* Create sequence for DWARF info. */
18955 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
18957 /* For dwarf info, we generate explicit stack update. */
18958 tmp = gen_rtx_SET (VOIDmode,
18959 stack_pointer_rtx,
18960 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
18961 RTX_FRAME_RELATED_P (tmp) = 1;
18962 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
18964 /* Save registers. */
18965 offset = - 4 * num_regs;
18966 j = 0;
18967 while (j <= LAST_ARM_REGNUM)
18968 if (saved_regs_mask & (1 << j))
18970 if ((j % 2 == 0)
18971 && (saved_regs_mask & (1 << (j + 1))))
18973 /* Current register and previous register form register pair for
18974 which STRD can be generated. */
18975 if (offset < 0)
18977 /* Allocate stack space for all saved registers. */
18978 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
18979 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
18980 mem = gen_frame_mem (DImode, tmp);
18981 offset = 0;
18983 else if (offset > 0)
18984 mem = gen_frame_mem (DImode,
18985 plus_constant (Pmode,
18986 stack_pointer_rtx,
18987 offset));
18988 else
18989 mem = gen_frame_mem (DImode, stack_pointer_rtx);
18991 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
18992 RTX_FRAME_RELATED_P (tmp) = 1;
18993 tmp = emit_insn (tmp);
18995 /* Record the first store insn. */
18996 if (dwarf_index == 1)
18997 insn = tmp;
18999 /* Generate dwarf info. */
19000 mem = gen_frame_mem (SImode,
19001 plus_constant (Pmode,
19002 stack_pointer_rtx,
19003 offset));
19004 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19005 RTX_FRAME_RELATED_P (tmp) = 1;
19006 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19008 mem = gen_frame_mem (SImode,
19009 plus_constant (Pmode,
19010 stack_pointer_rtx,
19011 offset + 4));
19012 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19013 RTX_FRAME_RELATED_P (tmp) = 1;
19014 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19016 offset += 8;
19017 j += 2;
19019 else
19021 /* Emit a single word store. */
19022 if (offset < 0)
19024 /* Allocate stack space for all saved registers. */
19025 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19026 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19027 mem = gen_frame_mem (SImode, tmp);
19028 offset = 0;
19030 else if (offset > 0)
19031 mem = gen_frame_mem (SImode,
19032 plus_constant (Pmode,
19033 stack_pointer_rtx,
19034 offset));
19035 else
19036 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19038 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19039 RTX_FRAME_RELATED_P (tmp) = 1;
19040 tmp = emit_insn (tmp);
19042 /* Record the first store insn. */
19043 if (dwarf_index == 1)
19044 insn = tmp;
19046 /* Generate dwarf info. */
19047 mem = gen_frame_mem (SImode,
19048 plus_constant(Pmode,
19049 stack_pointer_rtx,
19050 offset));
19051 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19052 RTX_FRAME_RELATED_P (tmp) = 1;
19053 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19055 offset += 4;
19056 j += 1;
19059 else
19060 j++;
19062 /* Attach dwarf info to the first insn we generate. */
19063 gcc_assert (insn != NULL_RTX);
19064 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19065 RTX_FRAME_RELATED_P (insn) = 1;
19068 /* Generate and emit an insn that we will recognize as a push_multi.
19069 Unfortunately, since this insn does not reflect very well the actual
19070 semantics of the operation, we need to annotate the insn for the benefit
19071 of DWARF2 frame unwind information. */
19072 static rtx
19073 emit_multi_reg_push (unsigned long mask)
19075 int num_regs = 0;
19076 int num_dwarf_regs;
19077 int i, j;
19078 rtx par;
19079 rtx dwarf;
19080 int dwarf_par_index;
19081 rtx tmp, reg;
19083 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19084 if (mask & (1 << i))
19085 num_regs++;
19087 gcc_assert (num_regs && num_regs <= 16);
19089 /* We don't record the PC in the dwarf frame information. */
19090 num_dwarf_regs = num_regs;
19091 if (mask & (1 << PC_REGNUM))
19092 num_dwarf_regs--;
19094 /* For the body of the insn we are going to generate an UNSPEC in
19095 parallel with several USEs. This allows the insn to be recognized
19096 by the push_multi pattern in the arm.md file.
19098 The body of the insn looks something like this:
19100 (parallel [
19101 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19102 (const_int:SI <num>)))
19103 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19104 (use (reg:SI XX))
19105 (use (reg:SI YY))
19109 For the frame note however, we try to be more explicit and actually
19110 show each register being stored into the stack frame, plus a (single)
19111 decrement of the stack pointer. We do it this way in order to be
19112 friendly to the stack unwinding code, which only wants to see a single
19113 stack decrement per instruction. The RTL we generate for the note looks
19114 something like this:
19116 (sequence [
19117 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19118 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19119 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19120 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19124 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19125 instead we'd have a parallel expression detailing all
19126 the stores to the various memory addresses so that debug
19127 information is more up-to-date. Remember however while writing
19128 this to take care of the constraints with the push instruction.
19130 Note also that this has to be taken care of for the VFP registers.
19132 For more see PR43399. */
19134 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19135 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19136 dwarf_par_index = 1;
19138 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19140 if (mask & (1 << i))
19142 reg = gen_rtx_REG (SImode, i);
19144 XVECEXP (par, 0, 0)
19145 = gen_rtx_SET (VOIDmode,
19146 gen_frame_mem
19147 (BLKmode,
19148 gen_rtx_PRE_MODIFY (Pmode,
19149 stack_pointer_rtx,
19150 plus_constant
19151 (Pmode, stack_pointer_rtx,
19152 -4 * num_regs))
19154 gen_rtx_UNSPEC (BLKmode,
19155 gen_rtvec (1, reg),
19156 UNSPEC_PUSH_MULT));
19158 if (i != PC_REGNUM)
19160 tmp = gen_rtx_SET (VOIDmode,
19161 gen_frame_mem (SImode, stack_pointer_rtx),
19162 reg);
19163 RTX_FRAME_RELATED_P (tmp) = 1;
19164 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19165 dwarf_par_index++;
19168 break;
19172 for (j = 1, i++; j < num_regs; i++)
19174 if (mask & (1 << i))
19176 reg = gen_rtx_REG (SImode, i);
19178 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19180 if (i != PC_REGNUM)
19183 = gen_rtx_SET (VOIDmode,
19184 gen_frame_mem
19185 (SImode,
19186 plus_constant (Pmode, stack_pointer_rtx,
19187 4 * j)),
19188 reg);
19189 RTX_FRAME_RELATED_P (tmp) = 1;
19190 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19193 j++;
19197 par = emit_insn (par);
19199 tmp = gen_rtx_SET (VOIDmode,
19200 stack_pointer_rtx,
19201 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19202 RTX_FRAME_RELATED_P (tmp) = 1;
19203 XVECEXP (dwarf, 0, 0) = tmp;
19205 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19207 return par;
19210 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19211 SIZE is the offset to be adjusted.
19212 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19213 static void
19214 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19216 rtx dwarf;
19218 RTX_FRAME_RELATED_P (insn) = 1;
19219 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19220 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19223 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19224 SAVED_REGS_MASK shows which registers need to be restored.
19226 Unfortunately, since this insn does not reflect very well the actual
19227 semantics of the operation, we need to annotate the insn for the benefit
19228 of DWARF2 frame unwind information. */
19229 static void
19230 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19232 int num_regs = 0;
19233 int i, j;
19234 rtx par;
19235 rtx dwarf = NULL_RTX;
19236 rtx tmp, reg;
19237 bool return_in_pc;
19238 int offset_adj;
19239 int emit_update;
19241 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19242 offset_adj = return_in_pc ? 1 : 0;
19243 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19244 if (saved_regs_mask & (1 << i))
19245 num_regs++;
19247 gcc_assert (num_regs && num_regs <= 16);
19249 /* If SP is in reglist, then we don't emit SP update insn. */
19250 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19252 /* The parallel needs to hold num_regs SETs
19253 and one SET for the stack update. */
19254 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19256 if (return_in_pc)
19258 tmp = ret_rtx;
19259 XVECEXP (par, 0, 0) = tmp;
19262 if (emit_update)
19264 /* Increment the stack pointer, based on there being
19265 num_regs 4-byte registers to restore. */
19266 tmp = gen_rtx_SET (VOIDmode,
19267 stack_pointer_rtx,
19268 plus_constant (Pmode,
19269 stack_pointer_rtx,
19270 4 * num_regs));
19271 RTX_FRAME_RELATED_P (tmp) = 1;
19272 XVECEXP (par, 0, offset_adj) = tmp;
19275 /* Now restore every reg, which may include PC. */
19276 for (j = 0, i = 0; j < num_regs; i++)
19277 if (saved_regs_mask & (1 << i))
19279 reg = gen_rtx_REG (SImode, i);
19280 if ((num_regs == 1) && emit_update && !return_in_pc)
19282 /* Emit single load with writeback. */
19283 tmp = gen_frame_mem (SImode,
19284 gen_rtx_POST_INC (Pmode,
19285 stack_pointer_rtx));
19286 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19287 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19288 return;
19291 tmp = gen_rtx_SET (VOIDmode,
19292 reg,
19293 gen_frame_mem
19294 (SImode,
19295 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19296 RTX_FRAME_RELATED_P (tmp) = 1;
19297 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19299 /* We need to maintain a sequence for DWARF info too. As dwarf info
19300 should not have PC, skip PC. */
19301 if (i != PC_REGNUM)
19302 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19304 j++;
19307 if (return_in_pc)
19308 par = emit_jump_insn (par);
19309 else
19310 par = emit_insn (par);
19312 REG_NOTES (par) = dwarf;
19313 if (!return_in_pc)
19314 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19315 stack_pointer_rtx, stack_pointer_rtx);
19318 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19319 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19321 Unfortunately, since this insn does not reflect very well the actual
19322 semantics of the operation, we need to annotate the insn for the benefit
19323 of DWARF2 frame unwind information. */
19324 static void
19325 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19327 int i, j;
19328 rtx par;
19329 rtx dwarf = NULL_RTX;
19330 rtx tmp, reg;
19332 gcc_assert (num_regs && num_regs <= 32);
19334 /* Workaround ARM10 VFPr1 bug. */
19335 if (num_regs == 2 && !arm_arch6)
19337 if (first_reg == 15)
19338 first_reg--;
19340 num_regs++;
19343 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19344 there could be up to 32 D-registers to restore.
19345 If there are more than 16 D-registers, make two recursive calls,
19346 each of which emits one pop_multi instruction. */
19347 if (num_regs > 16)
19349 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19350 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19351 return;
19354 /* The parallel needs to hold num_regs SETs
19355 and one SET for the stack update. */
19356 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19358 /* Increment the stack pointer, based on there being
19359 num_regs 8-byte registers to restore. */
19360 tmp = gen_rtx_SET (VOIDmode,
19361 base_reg,
19362 plus_constant (Pmode, base_reg, 8 * num_regs));
19363 RTX_FRAME_RELATED_P (tmp) = 1;
19364 XVECEXP (par, 0, 0) = tmp;
19366 /* Now show every reg that will be restored, using a SET for each. */
19367 for (j = 0, i=first_reg; j < num_regs; i += 2)
19369 reg = gen_rtx_REG (DFmode, i);
19371 tmp = gen_rtx_SET (VOIDmode,
19372 reg,
19373 gen_frame_mem
19374 (DFmode,
19375 plus_constant (Pmode, base_reg, 8 * j)));
19376 RTX_FRAME_RELATED_P (tmp) = 1;
19377 XVECEXP (par, 0, j + 1) = tmp;
19379 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19381 j++;
19384 par = emit_insn (par);
19385 REG_NOTES (par) = dwarf;
19387 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19388 base_reg, base_reg);
19391 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19392 number of registers are being popped, multiple LDRD patterns are created for
19393 all register pairs. If odd number of registers are popped, last register is
19394 loaded by using LDR pattern. */
19395 static void
19396 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19398 int num_regs = 0;
19399 int i, j;
19400 rtx par = NULL_RTX;
19401 rtx dwarf = NULL_RTX;
19402 rtx tmp, reg, tmp1;
19403 bool return_in_pc;
19405 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19406 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19407 if (saved_regs_mask & (1 << i))
19408 num_regs++;
19410 gcc_assert (num_regs && num_regs <= 16);
19412 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19413 to be popped. So, if num_regs is even, now it will become odd,
19414 and we can generate pop with PC. If num_regs is odd, it will be
19415 even now, and ldr with return can be generated for PC. */
19416 if (return_in_pc)
19417 num_regs--;
19419 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19421 /* Var j iterates over all the registers to gather all the registers in
19422 saved_regs_mask. Var i gives index of saved registers in stack frame.
19423 A PARALLEL RTX of register-pair is created here, so that pattern for
19424 LDRD can be matched. As PC is always last register to be popped, and
19425 we have already decremented num_regs if PC, we don't have to worry
19426 about PC in this loop. */
19427 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19428 if (saved_regs_mask & (1 << j))
19430 /* Create RTX for memory load. */
19431 reg = gen_rtx_REG (SImode, j);
19432 tmp = gen_rtx_SET (SImode,
19433 reg,
19434 gen_frame_mem (SImode,
19435 plus_constant (Pmode,
19436 stack_pointer_rtx, 4 * i)));
19437 RTX_FRAME_RELATED_P (tmp) = 1;
19439 if (i % 2 == 0)
19441 /* When saved-register index (i) is even, the RTX to be emitted is
19442 yet to be created. Hence create it first. The LDRD pattern we
19443 are generating is :
19444 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19445 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19446 where target registers need not be consecutive. */
19447 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19448 dwarf = NULL_RTX;
19451 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19452 added as 0th element and if i is odd, reg_i is added as 1st element
19453 of LDRD pattern shown above. */
19454 XVECEXP (par, 0, (i % 2)) = tmp;
19455 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19457 if ((i % 2) == 1)
19459 /* When saved-register index (i) is odd, RTXs for both the registers
19460 to be loaded are generated in above given LDRD pattern, and the
19461 pattern can be emitted now. */
19462 par = emit_insn (par);
19463 REG_NOTES (par) = dwarf;
19464 RTX_FRAME_RELATED_P (par) = 1;
19467 i++;
19470 /* If the number of registers pushed is odd AND return_in_pc is false OR
19471 number of registers are even AND return_in_pc is true, last register is
19472 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19473 then LDR with post increment. */
19475 /* Increment the stack pointer, based on there being
19476 num_regs 4-byte registers to restore. */
19477 tmp = gen_rtx_SET (VOIDmode,
19478 stack_pointer_rtx,
19479 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19480 RTX_FRAME_RELATED_P (tmp) = 1;
19481 tmp = emit_insn (tmp);
19482 if (!return_in_pc)
19484 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19485 stack_pointer_rtx, stack_pointer_rtx);
19488 dwarf = NULL_RTX;
19490 if (((num_regs % 2) == 1 && !return_in_pc)
19491 || ((num_regs % 2) == 0 && return_in_pc))
19493 /* Scan for the single register to be popped. Skip until the saved
19494 register is found. */
19495 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19497 /* Gen LDR with post increment here. */
19498 tmp1 = gen_rtx_MEM (SImode,
19499 gen_rtx_POST_INC (SImode,
19500 stack_pointer_rtx));
19501 set_mem_alias_set (tmp1, get_frame_alias_set ());
19503 reg = gen_rtx_REG (SImode, j);
19504 tmp = gen_rtx_SET (SImode, reg, tmp1);
19505 RTX_FRAME_RELATED_P (tmp) = 1;
19506 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19508 if (return_in_pc)
19510 /* If return_in_pc, j must be PC_REGNUM. */
19511 gcc_assert (j == PC_REGNUM);
19512 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19513 XVECEXP (par, 0, 0) = ret_rtx;
19514 XVECEXP (par, 0, 1) = tmp;
19515 par = emit_jump_insn (par);
19517 else
19519 par = emit_insn (tmp);
19520 REG_NOTES (par) = dwarf;
19521 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19522 stack_pointer_rtx, stack_pointer_rtx);
19526 else if ((num_regs % 2) == 1 && return_in_pc)
19528 /* There are 2 registers to be popped. So, generate the pattern
19529 pop_multiple_with_stack_update_and_return to pop in PC. */
19530 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
19533 return;
19536 /* LDRD in ARM mode needs consecutive registers as operands. This function
19537 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19538 offset addressing and then generates one separate stack udpate. This provides
19539 more scheduling freedom, compared to writeback on every load. However,
19540 if the function returns using load into PC directly
19541 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19542 before the last load. TODO: Add a peephole optimization to recognize
19543 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19544 peephole optimization to merge the load at stack-offset zero
19545 with the stack update instruction using load with writeback
19546 in post-index addressing mode. */
19547 static void
19548 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
19550 int j = 0;
19551 int offset = 0;
19552 rtx par = NULL_RTX;
19553 rtx dwarf = NULL_RTX;
19554 rtx tmp, mem;
19556 /* Restore saved registers. */
19557 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
19558 j = 0;
19559 while (j <= LAST_ARM_REGNUM)
19560 if (saved_regs_mask & (1 << j))
19562 if ((j % 2) == 0
19563 && (saved_regs_mask & (1 << (j + 1)))
19564 && (j + 1) != PC_REGNUM)
19566 /* Current register and next register form register pair for which
19567 LDRD can be generated. PC is always the last register popped, and
19568 we handle it separately. */
19569 if (offset > 0)
19570 mem = gen_frame_mem (DImode,
19571 plus_constant (Pmode,
19572 stack_pointer_rtx,
19573 offset));
19574 else
19575 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19577 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
19578 tmp = emit_insn (tmp);
19579 RTX_FRAME_RELATED_P (tmp) = 1;
19581 /* Generate dwarf info. */
19583 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19584 gen_rtx_REG (SImode, j),
19585 NULL_RTX);
19586 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19587 gen_rtx_REG (SImode, j + 1),
19588 dwarf);
19590 REG_NOTES (tmp) = dwarf;
19592 offset += 8;
19593 j += 2;
19595 else if (j != PC_REGNUM)
19597 /* Emit a single word load. */
19598 if (offset > 0)
19599 mem = gen_frame_mem (SImode,
19600 plus_constant (Pmode,
19601 stack_pointer_rtx,
19602 offset));
19603 else
19604 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19606 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
19607 tmp = emit_insn (tmp);
19608 RTX_FRAME_RELATED_P (tmp) = 1;
19610 /* Generate dwarf info. */
19611 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
19612 gen_rtx_REG (SImode, j),
19613 NULL_RTX);
19615 offset += 4;
19616 j += 1;
19618 else /* j == PC_REGNUM */
19619 j++;
19621 else
19622 j++;
19624 /* Update the stack. */
19625 if (offset > 0)
19627 tmp = gen_rtx_SET (Pmode,
19628 stack_pointer_rtx,
19629 plus_constant (Pmode,
19630 stack_pointer_rtx,
19631 offset));
19632 tmp = emit_insn (tmp);
19633 arm_add_cfa_adjust_cfa_note (tmp, offset,
19634 stack_pointer_rtx, stack_pointer_rtx);
19635 offset = 0;
19638 if (saved_regs_mask & (1 << PC_REGNUM))
19640 /* Only PC is to be popped. */
19641 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19642 XVECEXP (par, 0, 0) = ret_rtx;
19643 tmp = gen_rtx_SET (SImode,
19644 gen_rtx_REG (SImode, PC_REGNUM),
19645 gen_frame_mem (SImode,
19646 gen_rtx_POST_INC (SImode,
19647 stack_pointer_rtx)));
19648 RTX_FRAME_RELATED_P (tmp) = 1;
19649 XVECEXP (par, 0, 1) = tmp;
19650 par = emit_jump_insn (par);
19652 /* Generate dwarf info. */
19653 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19654 gen_rtx_REG (SImode, PC_REGNUM),
19655 NULL_RTX);
19656 REG_NOTES (par) = dwarf;
19657 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19658 stack_pointer_rtx, stack_pointer_rtx);
19662 /* Calculate the size of the return value that is passed in registers. */
19663 static unsigned
19664 arm_size_return_regs (void)
19666 enum machine_mode mode;
19668 if (crtl->return_rtx != 0)
19669 mode = GET_MODE (crtl->return_rtx);
19670 else
19671 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19673 return GET_MODE_SIZE (mode);
19676 /* Return true if the current function needs to save/restore LR. */
19677 static bool
19678 thumb_force_lr_save (void)
19680 return !cfun->machine->lr_save_eliminated
19681 && (!leaf_function_p ()
19682 || thumb_far_jump_used_p ()
19683 || df_regs_ever_live_p (LR_REGNUM));
19686 /* We do not know if r3 will be available because
19687 we do have an indirect tailcall happening in this
19688 particular case. */
19689 static bool
19690 is_indirect_tailcall_p (rtx call)
19692 rtx pat = PATTERN (call);
19694 /* Indirect tail call. */
19695 pat = XVECEXP (pat, 0, 0);
19696 if (GET_CODE (pat) == SET)
19697 pat = SET_SRC (pat);
19699 pat = XEXP (XEXP (pat, 0), 0);
19700 return REG_P (pat);
19703 /* Return true if r3 is used by any of the tail call insns in the
19704 current function. */
19705 static bool
19706 any_sibcall_could_use_r3 (void)
19708 edge_iterator ei;
19709 edge e;
19711 if (!crtl->tail_call_emit)
19712 return false;
19713 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19714 if (e->flags & EDGE_SIBCALL)
19716 rtx call = BB_END (e->src);
19717 if (!CALL_P (call))
19718 call = prev_nonnote_nondebug_insn (call);
19719 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
19720 if (find_regno_fusage (call, USE, 3)
19721 || is_indirect_tailcall_p (call))
19722 return true;
19724 return false;
19728 /* Compute the distance from register FROM to register TO.
19729 These can be the arg pointer (26), the soft frame pointer (25),
19730 the stack pointer (13) or the hard frame pointer (11).
19731 In thumb mode r7 is used as the soft frame pointer, if needed.
19732 Typical stack layout looks like this:
19734 old stack pointer -> | |
19735 ----
19736 | | \
19737 | | saved arguments for
19738 | | vararg functions
19739 | | /
19741 hard FP & arg pointer -> | | \
19742 | | stack
19743 | | frame
19744 | | /
19746 | | \
19747 | | call saved
19748 | | registers
19749 soft frame pointer -> | | /
19751 | | \
19752 | | local
19753 | | variables
19754 locals base pointer -> | | /
19756 | | \
19757 | | outgoing
19758 | | arguments
19759 current stack pointer -> | | /
19762 For a given function some or all of these stack components
19763 may not be needed, giving rise to the possibility of
19764 eliminating some of the registers.
19766 The values returned by this function must reflect the behavior
19767 of arm_expand_prologue() and arm_compute_save_reg_mask().
19769 The sign of the number returned reflects the direction of stack
19770 growth, so the values are positive for all eliminations except
19771 from the soft frame pointer to the hard frame pointer.
19773 SFP may point just inside the local variables block to ensure correct
19774 alignment. */
19777 /* Calculate stack offsets. These are used to calculate register elimination
19778 offsets and in prologue/epilogue code. Also calculates which registers
19779 should be saved. */
19781 static arm_stack_offsets *
19782 arm_get_frame_offsets (void)
19784 struct arm_stack_offsets *offsets;
19785 unsigned long func_type;
19786 int leaf;
19787 int saved;
19788 int core_saved;
19789 HOST_WIDE_INT frame_size;
19790 int i;
19792 offsets = &cfun->machine->stack_offsets;
19794 /* We need to know if we are a leaf function. Unfortunately, it
19795 is possible to be called after start_sequence has been called,
19796 which causes get_insns to return the insns for the sequence,
19797 not the function, which will cause leaf_function_p to return
19798 the incorrect result.
19800 to know about leaf functions once reload has completed, and the
19801 frame size cannot be changed after that time, so we can safely
19802 use the cached value. */
19804 if (reload_completed)
19805 return offsets;
19807 /* Initially this is the size of the local variables. It will translated
19808 into an offset once we have determined the size of preceding data. */
19809 frame_size = ROUND_UP_WORD (get_frame_size ());
19811 leaf = leaf_function_p ();
19813 /* Space for variadic functions. */
19814 offsets->saved_args = crtl->args.pretend_args_size;
19816 /* In Thumb mode this is incorrect, but never used. */
19817 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
19818 arm_compute_static_chain_stack_bytes();
19820 if (TARGET_32BIT)
19822 unsigned int regno;
19824 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
19825 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19826 saved = core_saved;
19828 /* We know that SP will be doubleword aligned on entry, and we must
19829 preserve that condition at any subroutine call. We also require the
19830 soft frame pointer to be doubleword aligned. */
19832 if (TARGET_REALLY_IWMMXT)
19834 /* Check for the call-saved iWMMXt registers. */
19835 for (regno = FIRST_IWMMXT_REGNUM;
19836 regno <= LAST_IWMMXT_REGNUM;
19837 regno++)
19838 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
19839 saved += 8;
19842 func_type = arm_current_func_type ();
19843 /* Space for saved VFP registers. */
19844 if (! IS_VOLATILE (func_type)
19845 && TARGET_HARD_FLOAT && TARGET_VFP)
19846 saved += arm_get_vfp_saved_size ();
19848 else /* TARGET_THUMB1 */
19850 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
19851 core_saved = bit_count (offsets->saved_regs_mask) * 4;
19852 saved = core_saved;
19853 if (TARGET_BACKTRACE)
19854 saved += 16;
19857 /* Saved registers include the stack frame. */
19858 offsets->saved_regs = offsets->saved_args + saved +
19859 arm_compute_static_chain_stack_bytes();
19860 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
19861 /* A leaf function does not need any stack alignment if it has nothing
19862 on the stack. */
19863 if (leaf && frame_size == 0
19864 /* However if it calls alloca(), we have a dynamically allocated
19865 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19866 && ! cfun->calls_alloca)
19868 offsets->outgoing_args = offsets->soft_frame;
19869 offsets->locals_base = offsets->soft_frame;
19870 return offsets;
19873 /* Ensure SFP has the correct alignment. */
19874 if (ARM_DOUBLEWORD_ALIGN
19875 && (offsets->soft_frame & 7))
19877 offsets->soft_frame += 4;
19878 /* Try to align stack by pushing an extra reg. Don't bother doing this
19879 when there is a stack frame as the alignment will be rolled into
19880 the normal stack adjustment. */
19881 if (frame_size + crtl->outgoing_args_size == 0)
19883 int reg = -1;
19885 /* If it is safe to use r3, then do so. This sometimes
19886 generates better code on Thumb-2 by avoiding the need to
19887 use 32-bit push/pop instructions. */
19888 if (! any_sibcall_could_use_r3 ()
19889 && arm_size_return_regs () <= 12
19890 && (offsets->saved_regs_mask & (1 << 3)) == 0
19891 && (TARGET_THUMB2
19892 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
19894 reg = 3;
19896 else
19897 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
19899 /* Avoid fixed registers; they may be changed at
19900 arbitrary times so it's unsafe to restore them
19901 during the epilogue. */
19902 if (!fixed_regs[i]
19903 && (offsets->saved_regs_mask & (1 << i)) == 0)
19905 reg = i;
19906 break;
19910 if (reg != -1)
19912 offsets->saved_regs += 4;
19913 offsets->saved_regs_mask |= (1 << reg);
19918 offsets->locals_base = offsets->soft_frame + frame_size;
19919 offsets->outgoing_args = (offsets->locals_base
19920 + crtl->outgoing_args_size);
19922 if (ARM_DOUBLEWORD_ALIGN)
19924 /* Ensure SP remains doubleword aligned. */
19925 if (offsets->outgoing_args & 7)
19926 offsets->outgoing_args += 4;
19927 gcc_assert (!(offsets->outgoing_args & 7));
19930 return offsets;
19934 /* Calculate the relative offsets for the different stack pointers. Positive
19935 offsets are in the direction of stack growth. */
19937 HOST_WIDE_INT
19938 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19940 arm_stack_offsets *offsets;
19942 offsets = arm_get_frame_offsets ();
19944 /* OK, now we have enough information to compute the distances.
19945 There must be an entry in these switch tables for each pair
19946 of registers in ELIMINABLE_REGS, even if some of the entries
19947 seem to be redundant or useless. */
19948 switch (from)
19950 case ARG_POINTER_REGNUM:
19951 switch (to)
19953 case THUMB_HARD_FRAME_POINTER_REGNUM:
19954 return 0;
19956 case FRAME_POINTER_REGNUM:
19957 /* This is the reverse of the soft frame pointer
19958 to hard frame pointer elimination below. */
19959 return offsets->soft_frame - offsets->saved_args;
19961 case ARM_HARD_FRAME_POINTER_REGNUM:
19962 /* This is only non-zero in the case where the static chain register
19963 is stored above the frame. */
19964 return offsets->frame - offsets->saved_args - 4;
19966 case STACK_POINTER_REGNUM:
19967 /* If nothing has been pushed on the stack at all
19968 then this will return -4. This *is* correct! */
19969 return offsets->outgoing_args - (offsets->saved_args + 4);
19971 default:
19972 gcc_unreachable ();
19974 gcc_unreachable ();
19976 case FRAME_POINTER_REGNUM:
19977 switch (to)
19979 case THUMB_HARD_FRAME_POINTER_REGNUM:
19980 return 0;
19982 case ARM_HARD_FRAME_POINTER_REGNUM:
19983 /* The hard frame pointer points to the top entry in the
19984 stack frame. The soft frame pointer to the bottom entry
19985 in the stack frame. If there is no stack frame at all,
19986 then they are identical. */
19988 return offsets->frame - offsets->soft_frame;
19990 case STACK_POINTER_REGNUM:
19991 return offsets->outgoing_args - offsets->soft_frame;
19993 default:
19994 gcc_unreachable ();
19996 gcc_unreachable ();
19998 default:
19999 /* You cannot eliminate from the stack pointer.
20000 In theory you could eliminate from the hard frame
20001 pointer to the stack pointer, but this will never
20002 happen, since if a stack frame is not needed the
20003 hard frame pointer will never be used. */
20004 gcc_unreachable ();
20008 /* Given FROM and TO register numbers, say whether this elimination is
20009 allowed. Frame pointer elimination is automatically handled.
20011 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20012 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20013 pointer, we must eliminate FRAME_POINTER_REGNUM into
20014 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20015 ARG_POINTER_REGNUM. */
20017 bool
20018 arm_can_eliminate (const int from, const int to)
20020 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20021 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20022 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20023 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20024 true);
20027 /* Emit RTL to save coprocessor registers on function entry. Returns the
20028 number of bytes pushed. */
20030 static int
20031 arm_save_coproc_regs(void)
20033 int saved_size = 0;
20034 unsigned reg;
20035 unsigned start_reg;
20036 rtx insn;
20038 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20039 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20041 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20042 insn = gen_rtx_MEM (V2SImode, insn);
20043 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20044 RTX_FRAME_RELATED_P (insn) = 1;
20045 saved_size += 8;
20048 if (TARGET_HARD_FLOAT && TARGET_VFP)
20050 start_reg = FIRST_VFP_REGNUM;
20052 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20054 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20055 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20057 if (start_reg != reg)
20058 saved_size += vfp_emit_fstmd (start_reg,
20059 (reg - start_reg) / 2);
20060 start_reg = reg + 2;
20063 if (start_reg != reg)
20064 saved_size += vfp_emit_fstmd (start_reg,
20065 (reg - start_reg) / 2);
20067 return saved_size;
20071 /* Set the Thumb frame pointer from the stack pointer. */
20073 static void
20074 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20076 HOST_WIDE_INT amount;
20077 rtx insn, dwarf;
20079 amount = offsets->outgoing_args - offsets->locals_base;
20080 if (amount < 1024)
20081 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20082 stack_pointer_rtx, GEN_INT (amount)));
20083 else
20085 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20086 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20087 expects the first two operands to be the same. */
20088 if (TARGET_THUMB2)
20090 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20091 stack_pointer_rtx,
20092 hard_frame_pointer_rtx));
20094 else
20096 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20097 hard_frame_pointer_rtx,
20098 stack_pointer_rtx));
20100 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20101 plus_constant (Pmode, stack_pointer_rtx, amount));
20102 RTX_FRAME_RELATED_P (dwarf) = 1;
20103 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20106 RTX_FRAME_RELATED_P (insn) = 1;
20109 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20110 function. */
20111 void
20112 arm_expand_prologue (void)
20114 rtx amount;
20115 rtx insn;
20116 rtx ip_rtx;
20117 unsigned long live_regs_mask;
20118 unsigned long func_type;
20119 int fp_offset = 0;
20120 int saved_pretend_args = 0;
20121 int saved_regs = 0;
20122 unsigned HOST_WIDE_INT args_to_push;
20123 arm_stack_offsets *offsets;
20125 func_type = arm_current_func_type ();
20127 /* Naked functions don't have prologues. */
20128 if (IS_NAKED (func_type))
20129 return;
20131 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20132 args_to_push = crtl->args.pretend_args_size;
20134 /* Compute which register we will have to save onto the stack. */
20135 offsets = arm_get_frame_offsets ();
20136 live_regs_mask = offsets->saved_regs_mask;
20138 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20140 if (IS_STACKALIGN (func_type))
20142 rtx r0, r1;
20144 /* Handle a word-aligned stack pointer. We generate the following:
20146 mov r0, sp
20147 bic r1, r0, #7
20148 mov sp, r1
20149 <save and restore r0 in normal prologue/epilogue>
20150 mov sp, r0
20151 bx lr
20153 The unwinder doesn't need to know about the stack realignment.
20154 Just tell it we saved SP in r0. */
20155 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20157 r0 = gen_rtx_REG (SImode, 0);
20158 r1 = gen_rtx_REG (SImode, 1);
20160 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20161 RTX_FRAME_RELATED_P (insn) = 1;
20162 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20164 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20166 /* ??? The CFA changes here, which may cause GDB to conclude that it
20167 has entered a different function. That said, the unwind info is
20168 correct, individually, before and after this instruction because
20169 we've described the save of SP, which will override the default
20170 handling of SP as restoring from the CFA. */
20171 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20174 /* For APCS frames, if IP register is clobbered
20175 when creating frame, save that register in a special
20176 way. */
20177 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20179 if (IS_INTERRUPT (func_type))
20181 /* Interrupt functions must not corrupt any registers.
20182 Creating a frame pointer however, corrupts the IP
20183 register, so we must push it first. */
20184 emit_multi_reg_push (1 << IP_REGNUM);
20186 /* Do not set RTX_FRAME_RELATED_P on this insn.
20187 The dwarf stack unwinding code only wants to see one
20188 stack decrement per function, and this is not it. If
20189 this instruction is labeled as being part of the frame
20190 creation sequence then dwarf2out_frame_debug_expr will
20191 die when it encounters the assignment of IP to FP
20192 later on, since the use of SP here establishes SP as
20193 the CFA register and not IP.
20195 Anyway this instruction is not really part of the stack
20196 frame creation although it is part of the prologue. */
20198 else if (IS_NESTED (func_type))
20200 /* The static chain register is the same as the IP register
20201 used as a scratch register during stack frame creation.
20202 To get around this need to find somewhere to store IP
20203 whilst the frame is being created. We try the following
20204 places in order:
20206 1. The last argument register r3.
20207 2. A slot on the stack above the frame. (This only
20208 works if the function is not a varargs function).
20209 3. Register r3 again, after pushing the argument registers
20210 onto the stack.
20212 Note - we only need to tell the dwarf2 backend about the SP
20213 adjustment in the second variant; the static chain register
20214 doesn't need to be unwound, as it doesn't contain a value
20215 inherited from the caller. */
20217 if (!arm_r3_live_at_start_p ())
20218 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20219 else if (args_to_push == 0)
20221 rtx dwarf;
20223 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20224 saved_regs += 4;
20226 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
20227 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
20228 fp_offset = 4;
20230 /* Just tell the dwarf backend that we adjusted SP. */
20231 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20232 plus_constant (Pmode, stack_pointer_rtx,
20233 -fp_offset));
20234 RTX_FRAME_RELATED_P (insn) = 1;
20235 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20237 else
20239 /* Store the args on the stack. */
20240 if (cfun->machine->uses_anonymous_args)
20241 insn = emit_multi_reg_push
20242 ((0xf0 >> (args_to_push / 4)) & 0xf);
20243 else
20244 insn = emit_insn
20245 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20246 GEN_INT (- args_to_push)));
20248 RTX_FRAME_RELATED_P (insn) = 1;
20250 saved_pretend_args = 1;
20251 fp_offset = args_to_push;
20252 args_to_push = 0;
20254 /* Now reuse r3 to preserve IP. */
20255 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20259 insn = emit_set_insn (ip_rtx,
20260 plus_constant (Pmode, stack_pointer_rtx,
20261 fp_offset));
20262 RTX_FRAME_RELATED_P (insn) = 1;
20265 if (args_to_push)
20267 /* Push the argument registers, or reserve space for them. */
20268 if (cfun->machine->uses_anonymous_args)
20269 insn = emit_multi_reg_push
20270 ((0xf0 >> (args_to_push / 4)) & 0xf);
20271 else
20272 insn = emit_insn
20273 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20274 GEN_INT (- args_to_push)));
20275 RTX_FRAME_RELATED_P (insn) = 1;
20278 /* If this is an interrupt service routine, and the link register
20279 is going to be pushed, and we're not generating extra
20280 push of IP (needed when frame is needed and frame layout if apcs),
20281 subtracting four from LR now will mean that the function return
20282 can be done with a single instruction. */
20283 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20284 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20285 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20286 && TARGET_ARM)
20288 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20290 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20293 if (live_regs_mask)
20295 saved_regs += bit_count (live_regs_mask) * 4;
20296 if (optimize_size && !frame_pointer_needed
20297 && saved_regs == offsets->saved_regs - offsets->saved_args)
20299 /* If no coprocessor registers are being pushed and we don't have
20300 to worry about a frame pointer then push extra registers to
20301 create the stack frame. This is done is a way that does not
20302 alter the frame layout, so is independent of the epilogue. */
20303 int n;
20304 int frame;
20305 n = 0;
20306 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20307 n++;
20308 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20309 if (frame && n * 4 >= frame)
20311 n = frame / 4;
20312 live_regs_mask |= (1 << n) - 1;
20313 saved_regs += frame;
20317 if (TARGET_LDRD
20318 && current_tune->prefer_ldrd_strd
20319 && !optimize_function_for_size_p (cfun))
20321 if (TARGET_THUMB2)
20323 thumb2_emit_strd_push (live_regs_mask);
20325 else if (TARGET_ARM
20326 && !TARGET_APCS_FRAME
20327 && !IS_INTERRUPT (func_type))
20329 arm_emit_strd_push (live_regs_mask);
20331 else
20333 insn = emit_multi_reg_push (live_regs_mask);
20334 RTX_FRAME_RELATED_P (insn) = 1;
20337 else
20339 insn = emit_multi_reg_push (live_regs_mask);
20340 RTX_FRAME_RELATED_P (insn) = 1;
20344 if (! IS_VOLATILE (func_type))
20345 saved_regs += arm_save_coproc_regs ();
20347 if (frame_pointer_needed && TARGET_ARM)
20349 /* Create the new frame pointer. */
20350 if (TARGET_APCS_FRAME)
20352 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20353 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20354 RTX_FRAME_RELATED_P (insn) = 1;
20356 if (IS_NESTED (func_type))
20358 /* Recover the static chain register. */
20359 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20360 insn = gen_rtx_REG (SImode, 3);
20361 else /* if (crtl->args.pretend_args_size == 0) */
20363 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20364 insn = gen_frame_mem (SImode, insn);
20366 emit_set_insn (ip_rtx, insn);
20367 /* Add a USE to stop propagate_one_insn() from barfing. */
20368 emit_insn (gen_force_register_use (ip_rtx));
20371 else
20373 insn = GEN_INT (saved_regs - 4);
20374 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20375 stack_pointer_rtx, insn));
20376 RTX_FRAME_RELATED_P (insn) = 1;
20380 if (flag_stack_usage_info)
20381 current_function_static_stack_size
20382 = offsets->outgoing_args - offsets->saved_args;
20384 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20386 /* This add can produce multiple insns for a large constant, so we
20387 need to get tricky. */
20388 rtx last = get_last_insn ();
20390 amount = GEN_INT (offsets->saved_args + saved_regs
20391 - offsets->outgoing_args);
20393 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20394 amount));
20397 last = last ? NEXT_INSN (last) : get_insns ();
20398 RTX_FRAME_RELATED_P (last) = 1;
20400 while (last != insn);
20402 /* If the frame pointer is needed, emit a special barrier that
20403 will prevent the scheduler from moving stores to the frame
20404 before the stack adjustment. */
20405 if (frame_pointer_needed)
20406 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20407 hard_frame_pointer_rtx));
20411 if (frame_pointer_needed && TARGET_THUMB2)
20412 thumb_set_frame_pointer (offsets);
20414 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20416 unsigned long mask;
20418 mask = live_regs_mask;
20419 mask &= THUMB2_WORK_REGS;
20420 if (!IS_NESTED (func_type))
20421 mask |= (1 << IP_REGNUM);
20422 arm_load_pic_register (mask);
20425 /* If we are profiling, make sure no instructions are scheduled before
20426 the call to mcount. Similarly if the user has requested no
20427 scheduling in the prolog. Similarly if we want non-call exceptions
20428 using the EABI unwinder, to prevent faulting instructions from being
20429 swapped with a stack adjustment. */
20430 if (crtl->profile || !TARGET_SCHED_PROLOG
20431 || (arm_except_unwind_info (&global_options) == UI_TARGET
20432 && cfun->can_throw_non_call_exceptions))
20433 emit_insn (gen_blockage ());
20435 /* If the link register is being kept alive, with the return address in it,
20436 then make sure that it does not get reused by the ce2 pass. */
20437 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20438 cfun->machine->lr_save_eliminated = 1;
20441 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20442 static void
20443 arm_print_condition (FILE *stream)
20445 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20447 /* Branch conversion is not implemented for Thumb-2. */
20448 if (TARGET_THUMB)
20450 output_operand_lossage ("predicated Thumb instruction");
20451 return;
20453 if (current_insn_predicate != NULL)
20455 output_operand_lossage
20456 ("predicated instruction in conditional sequence");
20457 return;
20460 fputs (arm_condition_codes[arm_current_cc], stream);
20462 else if (current_insn_predicate)
20464 enum arm_cond_code code;
20466 if (TARGET_THUMB1)
20468 output_operand_lossage ("predicated Thumb instruction");
20469 return;
20472 code = get_arm_condition_code (current_insn_predicate);
20473 fputs (arm_condition_codes[code], stream);
20478 /* If CODE is 'd', then the X is a condition operand and the instruction
20479 should only be executed if the condition is true.
20480 if CODE is 'D', then the X is a condition operand and the instruction
20481 should only be executed if the condition is false: however, if the mode
20482 of the comparison is CCFPEmode, then always execute the instruction -- we
20483 do this because in these circumstances !GE does not necessarily imply LT;
20484 in these cases the instruction pattern will take care to make sure that
20485 an instruction containing %d will follow, thereby undoing the effects of
20486 doing this instruction unconditionally.
20487 If CODE is 'N' then X is a floating point operand that must be negated
20488 before output.
20489 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20490 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20491 static void
20492 arm_print_operand (FILE *stream, rtx x, int code)
20494 switch (code)
20496 case '@':
20497 fputs (ASM_COMMENT_START, stream);
20498 return;
20500 case '_':
20501 fputs (user_label_prefix, stream);
20502 return;
20504 case '|':
20505 fputs (REGISTER_PREFIX, stream);
20506 return;
20508 case '?':
20509 arm_print_condition (stream);
20510 return;
20512 case '(':
20513 /* Nothing in unified syntax, otherwise the current condition code. */
20514 if (!TARGET_UNIFIED_ASM)
20515 arm_print_condition (stream);
20516 break;
20518 case ')':
20519 /* The current condition code in unified syntax, otherwise nothing. */
20520 if (TARGET_UNIFIED_ASM)
20521 arm_print_condition (stream);
20522 break;
20524 case '.':
20525 /* The current condition code for a condition code setting instruction.
20526 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20527 if (TARGET_UNIFIED_ASM)
20529 fputc('s', stream);
20530 arm_print_condition (stream);
20532 else
20534 arm_print_condition (stream);
20535 fputc('s', stream);
20537 return;
20539 case '!':
20540 /* If the instruction is conditionally executed then print
20541 the current condition code, otherwise print 's'. */
20542 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
20543 if (current_insn_predicate)
20544 arm_print_condition (stream);
20545 else
20546 fputc('s', stream);
20547 break;
20549 /* %# is a "break" sequence. It doesn't output anything, but is used to
20550 separate e.g. operand numbers from following text, if that text consists
20551 of further digits which we don't want to be part of the operand
20552 number. */
20553 case '#':
20554 return;
20556 case 'N':
20558 REAL_VALUE_TYPE r;
20559 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
20560 r = real_value_negate (&r);
20561 fprintf (stream, "%s", fp_const_from_val (&r));
20563 return;
20565 /* An integer or symbol address without a preceding # sign. */
20566 case 'c':
20567 switch (GET_CODE (x))
20569 case CONST_INT:
20570 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
20571 break;
20573 case SYMBOL_REF:
20574 output_addr_const (stream, x);
20575 break;
20577 case CONST:
20578 if (GET_CODE (XEXP (x, 0)) == PLUS
20579 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
20581 output_addr_const (stream, x);
20582 break;
20584 /* Fall through. */
20586 default:
20587 output_operand_lossage ("Unsupported operand for code '%c'", code);
20589 return;
20591 /* An integer that we want to print in HEX. */
20592 case 'x':
20593 switch (GET_CODE (x))
20595 case CONST_INT:
20596 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
20597 break;
20599 default:
20600 output_operand_lossage ("Unsupported operand for code '%c'", code);
20602 return;
20604 case 'B':
20605 if (CONST_INT_P (x))
20607 HOST_WIDE_INT val;
20608 val = ARM_SIGN_EXTEND (~INTVAL (x));
20609 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
20611 else
20613 putc ('~', stream);
20614 output_addr_const (stream, x);
20616 return;
20618 case 'L':
20619 /* The low 16 bits of an immediate constant. */
20620 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
20621 return;
20623 case 'i':
20624 fprintf (stream, "%s", arithmetic_instr (x, 1));
20625 return;
20627 case 'I':
20628 fprintf (stream, "%s", arithmetic_instr (x, 0));
20629 return;
20631 case 'S':
20633 HOST_WIDE_INT val;
20634 const char *shift;
20636 shift = shift_op (x, &val);
20638 if (shift)
20640 fprintf (stream, ", %s ", shift);
20641 if (val == -1)
20642 arm_print_operand (stream, XEXP (x, 1), 0);
20643 else
20644 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20647 return;
20649 /* An explanation of the 'Q', 'R' and 'H' register operands:
20651 In a pair of registers containing a DI or DF value the 'Q'
20652 operand returns the register number of the register containing
20653 the least significant part of the value. The 'R' operand returns
20654 the register number of the register containing the most
20655 significant part of the value.
20657 The 'H' operand returns the higher of the two register numbers.
20658 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20659 same as the 'Q' operand, since the most significant part of the
20660 value is held in the lower number register. The reverse is true
20661 on systems where WORDS_BIG_ENDIAN is false.
20663 The purpose of these operands is to distinguish between cases
20664 where the endian-ness of the values is important (for example
20665 when they are added together), and cases where the endian-ness
20666 is irrelevant, but the order of register operations is important.
20667 For example when loading a value from memory into a register
20668 pair, the endian-ness does not matter. Provided that the value
20669 from the lower memory address is put into the lower numbered
20670 register, and the value from the higher address is put into the
20671 higher numbered register, the load will work regardless of whether
20672 the value being loaded is big-wordian or little-wordian. The
20673 order of the two register loads can matter however, if the address
20674 of the memory location is actually held in one of the registers
20675 being overwritten by the load.
20677 The 'Q' and 'R' constraints are also available for 64-bit
20678 constants. */
20679 case 'Q':
20680 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20682 rtx part = gen_lowpart (SImode, x);
20683 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20684 return;
20687 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20689 output_operand_lossage ("invalid operand for code '%c'", code);
20690 return;
20693 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
20694 return;
20696 case 'R':
20697 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20699 enum machine_mode mode = GET_MODE (x);
20700 rtx part;
20702 if (mode == VOIDmode)
20703 mode = DImode;
20704 part = gen_highpart_mode (SImode, mode, x);
20705 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20706 return;
20709 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20711 output_operand_lossage ("invalid operand for code '%c'", code);
20712 return;
20715 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
20716 return;
20718 case 'H':
20719 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20721 output_operand_lossage ("invalid operand for code '%c'", code);
20722 return;
20725 asm_fprintf (stream, "%r", REGNO (x) + 1);
20726 return;
20728 case 'J':
20729 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20731 output_operand_lossage ("invalid operand for code '%c'", code);
20732 return;
20735 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
20736 return;
20738 case 'K':
20739 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20741 output_operand_lossage ("invalid operand for code '%c'", code);
20742 return;
20745 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
20746 return;
20748 case 'm':
20749 asm_fprintf (stream, "%r",
20750 REG_P (XEXP (x, 0))
20751 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
20752 return;
20754 case 'M':
20755 asm_fprintf (stream, "{%r-%r}",
20756 REGNO (x),
20757 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
20758 return;
20760 /* Like 'M', but writing doubleword vector registers, for use by Neon
20761 insns. */
20762 case 'h':
20764 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
20765 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
20766 if (numregs == 1)
20767 asm_fprintf (stream, "{d%d}", regno);
20768 else
20769 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
20771 return;
20773 case 'd':
20774 /* CONST_TRUE_RTX means always -- that's the default. */
20775 if (x == const_true_rtx)
20776 return;
20778 if (!COMPARISON_P (x))
20780 output_operand_lossage ("invalid operand for code '%c'", code);
20781 return;
20784 fputs (arm_condition_codes[get_arm_condition_code (x)],
20785 stream);
20786 return;
20788 case 'D':
20789 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
20790 want to do that. */
20791 if (x == const_true_rtx)
20793 output_operand_lossage ("instruction never executed");
20794 return;
20796 if (!COMPARISON_P (x))
20798 output_operand_lossage ("invalid operand for code '%c'", code);
20799 return;
20802 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
20803 (get_arm_condition_code (x))],
20804 stream);
20805 return;
20807 case 's':
20808 case 'V':
20809 case 'W':
20810 case 'X':
20811 case 'Y':
20812 case 'Z':
20813 /* Former Maverick support, removed after GCC-4.7. */
20814 output_operand_lossage ("obsolete Maverick format code '%c'", code);
20815 return;
20817 case 'U':
20818 if (!REG_P (x)
20819 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
20820 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
20821 /* Bad value for wCG register number. */
20823 output_operand_lossage ("invalid operand for code '%c'", code);
20824 return;
20827 else
20828 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
20829 return;
20831 /* Print an iWMMXt control register name. */
20832 case 'w':
20833 if (!CONST_INT_P (x)
20834 || INTVAL (x) < 0
20835 || INTVAL (x) >= 16)
20836 /* Bad value for wC register number. */
20838 output_operand_lossage ("invalid operand for code '%c'", code);
20839 return;
20842 else
20844 static const char * wc_reg_names [16] =
20846 "wCID", "wCon", "wCSSF", "wCASF",
20847 "wC4", "wC5", "wC6", "wC7",
20848 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
20849 "wC12", "wC13", "wC14", "wC15"
20852 fputs (wc_reg_names [INTVAL (x)], stream);
20854 return;
20856 /* Print the high single-precision register of a VFP double-precision
20857 register. */
20858 case 'p':
20860 int mode = GET_MODE (x);
20861 int regno;
20863 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
20865 output_operand_lossage ("invalid operand for code '%c'", code);
20866 return;
20869 regno = REGNO (x);
20870 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
20872 output_operand_lossage ("invalid operand for code '%c'", code);
20873 return;
20876 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
20878 return;
20880 /* Print a VFP/Neon double precision or quad precision register name. */
20881 case 'P':
20882 case 'q':
20884 int mode = GET_MODE (x);
20885 int is_quad = (code == 'q');
20886 int regno;
20888 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
20890 output_operand_lossage ("invalid operand for code '%c'", code);
20891 return;
20894 if (!REG_P (x)
20895 || !IS_VFP_REGNUM (REGNO (x)))
20897 output_operand_lossage ("invalid operand for code '%c'", code);
20898 return;
20901 regno = REGNO (x);
20902 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
20903 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
20905 output_operand_lossage ("invalid operand for code '%c'", code);
20906 return;
20909 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
20910 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
20912 return;
20914 /* These two codes print the low/high doubleword register of a Neon quad
20915 register, respectively. For pair-structure types, can also print
20916 low/high quadword registers. */
20917 case 'e':
20918 case 'f':
20920 int mode = GET_MODE (x);
20921 int regno;
20923 if ((GET_MODE_SIZE (mode) != 16
20924 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
20926 output_operand_lossage ("invalid operand for code '%c'", code);
20927 return;
20930 regno = REGNO (x);
20931 if (!NEON_REGNO_OK_FOR_QUAD (regno))
20933 output_operand_lossage ("invalid operand for code '%c'", code);
20934 return;
20937 if (GET_MODE_SIZE (mode) == 16)
20938 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
20939 + (code == 'f' ? 1 : 0));
20940 else
20941 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
20942 + (code == 'f' ? 1 : 0));
20944 return;
20946 /* Print a VFPv3 floating-point constant, represented as an integer
20947 index. */
20948 case 'G':
20950 int index = vfp3_const_double_index (x);
20951 gcc_assert (index != -1);
20952 fprintf (stream, "%d", index);
20954 return;
20956 /* Print bits representing opcode features for Neon.
20958 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
20959 and polynomials as unsigned.
20961 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
20963 Bit 2 is 1 for rounding functions, 0 otherwise. */
20965 /* Identify the type as 's', 'u', 'p' or 'f'. */
20966 case 'T':
20968 HOST_WIDE_INT bits = INTVAL (x);
20969 fputc ("uspf"[bits & 3], stream);
20971 return;
20973 /* Likewise, but signed and unsigned integers are both 'i'. */
20974 case 'F':
20976 HOST_WIDE_INT bits = INTVAL (x);
20977 fputc ("iipf"[bits & 3], stream);
20979 return;
20981 /* As for 'T', but emit 'u' instead of 'p'. */
20982 case 't':
20984 HOST_WIDE_INT bits = INTVAL (x);
20985 fputc ("usuf"[bits & 3], stream);
20987 return;
20989 /* Bit 2: rounding (vs none). */
20990 case 'O':
20992 HOST_WIDE_INT bits = INTVAL (x);
20993 fputs ((bits & 4) != 0 ? "r" : "", stream);
20995 return;
20997 /* Memory operand for vld1/vst1 instruction. */
20998 case 'A':
21000 rtx addr;
21001 bool postinc = FALSE;
21002 unsigned align, memsize, align_bits;
21004 gcc_assert (MEM_P (x));
21005 addr = XEXP (x, 0);
21006 if (GET_CODE (addr) == POST_INC)
21008 postinc = 1;
21009 addr = XEXP (addr, 0);
21011 asm_fprintf (stream, "[%r", REGNO (addr));
21013 /* We know the alignment of this access, so we can emit a hint in the
21014 instruction (for some alignments) as an aid to the memory subsystem
21015 of the target. */
21016 align = MEM_ALIGN (x) >> 3;
21017 memsize = MEM_SIZE (x);
21019 /* Only certain alignment specifiers are supported by the hardware. */
21020 if (memsize == 32 && (align % 32) == 0)
21021 align_bits = 256;
21022 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21023 align_bits = 128;
21024 else if (memsize >= 8 && (align % 8) == 0)
21025 align_bits = 64;
21026 else
21027 align_bits = 0;
21029 if (align_bits != 0)
21030 asm_fprintf (stream, ":%d", align_bits);
21032 asm_fprintf (stream, "]");
21034 if (postinc)
21035 fputs("!", stream);
21037 return;
21039 case 'C':
21041 rtx addr;
21043 gcc_assert (MEM_P (x));
21044 addr = XEXP (x, 0);
21045 gcc_assert (REG_P (addr));
21046 asm_fprintf (stream, "[%r]", REGNO (addr));
21048 return;
21050 /* Translate an S register number into a D register number and element index. */
21051 case 'y':
21053 int mode = GET_MODE (x);
21054 int regno;
21056 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21058 output_operand_lossage ("invalid operand for code '%c'", code);
21059 return;
21062 regno = REGNO (x);
21063 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21065 output_operand_lossage ("invalid operand for code '%c'", code);
21066 return;
21069 regno = regno - FIRST_VFP_REGNUM;
21070 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21072 return;
21074 case 'v':
21075 gcc_assert (CONST_DOUBLE_P (x));
21076 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
21077 return;
21079 /* Register specifier for vld1.16/vst1.16. Translate the S register
21080 number into a D register number and element index. */
21081 case 'z':
21083 int mode = GET_MODE (x);
21084 int regno;
21086 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21088 output_operand_lossage ("invalid operand for code '%c'", code);
21089 return;
21092 regno = REGNO (x);
21093 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21095 output_operand_lossage ("invalid operand for code '%c'", code);
21096 return;
21099 regno = regno - FIRST_VFP_REGNUM;
21100 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21102 return;
21104 default:
21105 if (x == 0)
21107 output_operand_lossage ("missing operand");
21108 return;
21111 switch (GET_CODE (x))
21113 case REG:
21114 asm_fprintf (stream, "%r", REGNO (x));
21115 break;
21117 case MEM:
21118 output_memory_reference_mode = GET_MODE (x);
21119 output_address (XEXP (x, 0));
21120 break;
21122 case CONST_DOUBLE:
21123 if (TARGET_NEON)
21125 char fpstr[20];
21126 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21127 sizeof (fpstr), 0, 1);
21128 fprintf (stream, "#%s", fpstr);
21130 else
21131 fprintf (stream, "#%s", fp_immediate_constant (x));
21132 break;
21134 default:
21135 gcc_assert (GET_CODE (x) != NEG);
21136 fputc ('#', stream);
21137 if (GET_CODE (x) == HIGH)
21139 fputs (":lower16:", stream);
21140 x = XEXP (x, 0);
21143 output_addr_const (stream, x);
21144 break;
21149 /* Target hook for printing a memory address. */
21150 static void
21151 arm_print_operand_address (FILE *stream, rtx x)
21153 if (TARGET_32BIT)
21155 int is_minus = GET_CODE (x) == MINUS;
21157 if (REG_P (x))
21158 asm_fprintf (stream, "[%r]", REGNO (x));
21159 else if (GET_CODE (x) == PLUS || is_minus)
21161 rtx base = XEXP (x, 0);
21162 rtx index = XEXP (x, 1);
21163 HOST_WIDE_INT offset = 0;
21164 if (!REG_P (base)
21165 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21167 /* Ensure that BASE is a register. */
21168 /* (one of them must be). */
21169 /* Also ensure the SP is not used as in index register. */
21170 rtx temp = base;
21171 base = index;
21172 index = temp;
21174 switch (GET_CODE (index))
21176 case CONST_INT:
21177 offset = INTVAL (index);
21178 if (is_minus)
21179 offset = -offset;
21180 asm_fprintf (stream, "[%r, #%wd]",
21181 REGNO (base), offset);
21182 break;
21184 case REG:
21185 asm_fprintf (stream, "[%r, %s%r]",
21186 REGNO (base), is_minus ? "-" : "",
21187 REGNO (index));
21188 break;
21190 case MULT:
21191 case ASHIFTRT:
21192 case LSHIFTRT:
21193 case ASHIFT:
21194 case ROTATERT:
21196 asm_fprintf (stream, "[%r, %s%r",
21197 REGNO (base), is_minus ? "-" : "",
21198 REGNO (XEXP (index, 0)));
21199 arm_print_operand (stream, index, 'S');
21200 fputs ("]", stream);
21201 break;
21204 default:
21205 gcc_unreachable ();
21208 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21209 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21211 extern enum machine_mode output_memory_reference_mode;
21213 gcc_assert (REG_P (XEXP (x, 0)));
21215 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21216 asm_fprintf (stream, "[%r, #%s%d]!",
21217 REGNO (XEXP (x, 0)),
21218 GET_CODE (x) == PRE_DEC ? "-" : "",
21219 GET_MODE_SIZE (output_memory_reference_mode));
21220 else
21221 asm_fprintf (stream, "[%r], #%s%d",
21222 REGNO (XEXP (x, 0)),
21223 GET_CODE (x) == POST_DEC ? "-" : "",
21224 GET_MODE_SIZE (output_memory_reference_mode));
21226 else if (GET_CODE (x) == PRE_MODIFY)
21228 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21229 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21230 asm_fprintf (stream, "#%wd]!",
21231 INTVAL (XEXP (XEXP (x, 1), 1)));
21232 else
21233 asm_fprintf (stream, "%r]!",
21234 REGNO (XEXP (XEXP (x, 1), 1)));
21236 else if (GET_CODE (x) == POST_MODIFY)
21238 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21239 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21240 asm_fprintf (stream, "#%wd",
21241 INTVAL (XEXP (XEXP (x, 1), 1)));
21242 else
21243 asm_fprintf (stream, "%r",
21244 REGNO (XEXP (XEXP (x, 1), 1)));
21246 else output_addr_const (stream, x);
21248 else
21250 if (REG_P (x))
21251 asm_fprintf (stream, "[%r]", REGNO (x));
21252 else if (GET_CODE (x) == POST_INC)
21253 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21254 else if (GET_CODE (x) == PLUS)
21256 gcc_assert (REG_P (XEXP (x, 0)));
21257 if (CONST_INT_P (XEXP (x, 1)))
21258 asm_fprintf (stream, "[%r, #%wd]",
21259 REGNO (XEXP (x, 0)),
21260 INTVAL (XEXP (x, 1)));
21261 else
21262 asm_fprintf (stream, "[%r, %r]",
21263 REGNO (XEXP (x, 0)),
21264 REGNO (XEXP (x, 1)));
21266 else
21267 output_addr_const (stream, x);
21271 /* Target hook for indicating whether a punctuation character for
21272 TARGET_PRINT_OPERAND is valid. */
21273 static bool
21274 arm_print_operand_punct_valid_p (unsigned char code)
21276 return (code == '@' || code == '|' || code == '.'
21277 || code == '(' || code == ')' || code == '#'
21278 || (TARGET_32BIT && (code == '?'))
21279 || (TARGET_THUMB2 && (code == '!'))
21280 || (TARGET_THUMB && (code == '_')));
21283 /* Target hook for assembling integer objects. The ARM version needs to
21284 handle word-sized values specially. */
21285 static bool
21286 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21288 enum machine_mode mode;
21290 if (size == UNITS_PER_WORD && aligned_p)
21292 fputs ("\t.word\t", asm_out_file);
21293 output_addr_const (asm_out_file, x);
21295 /* Mark symbols as position independent. We only do this in the
21296 .text segment, not in the .data segment. */
21297 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21298 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21300 /* See legitimize_pic_address for an explanation of the
21301 TARGET_VXWORKS_RTP check. */
21302 if (TARGET_VXWORKS_RTP
21303 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21304 fputs ("(GOT)", asm_out_file);
21305 else
21306 fputs ("(GOTOFF)", asm_out_file);
21308 fputc ('\n', asm_out_file);
21309 return true;
21312 mode = GET_MODE (x);
21314 if (arm_vector_mode_supported_p (mode))
21316 int i, units;
21318 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21320 units = CONST_VECTOR_NUNITS (x);
21321 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21323 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21324 for (i = 0; i < units; i++)
21326 rtx elt = CONST_VECTOR_ELT (x, i);
21327 assemble_integer
21328 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21330 else
21331 for (i = 0; i < units; i++)
21333 rtx elt = CONST_VECTOR_ELT (x, i);
21334 REAL_VALUE_TYPE rval;
21336 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21338 assemble_real
21339 (rval, GET_MODE_INNER (mode),
21340 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21343 return true;
21346 return default_assemble_integer (x, size, aligned_p);
21349 static void
21350 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21352 section *s;
21354 if (!TARGET_AAPCS_BASED)
21356 (is_ctor ?
21357 default_named_section_asm_out_constructor
21358 : default_named_section_asm_out_destructor) (symbol, priority);
21359 return;
21362 /* Put these in the .init_array section, using a special relocation. */
21363 if (priority != DEFAULT_INIT_PRIORITY)
21365 char buf[18];
21366 sprintf (buf, "%s.%.5u",
21367 is_ctor ? ".init_array" : ".fini_array",
21368 priority);
21369 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21371 else if (is_ctor)
21372 s = ctors_section;
21373 else
21374 s = dtors_section;
21376 switch_to_section (s);
21377 assemble_align (POINTER_SIZE);
21378 fputs ("\t.word\t", asm_out_file);
21379 output_addr_const (asm_out_file, symbol);
21380 fputs ("(target1)\n", asm_out_file);
21383 /* Add a function to the list of static constructors. */
21385 static void
21386 arm_elf_asm_constructor (rtx symbol, int priority)
21388 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21391 /* Add a function to the list of static destructors. */
21393 static void
21394 arm_elf_asm_destructor (rtx symbol, int priority)
21396 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21399 /* A finite state machine takes care of noticing whether or not instructions
21400 can be conditionally executed, and thus decrease execution time and code
21401 size by deleting branch instructions. The fsm is controlled by
21402 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21404 /* The state of the fsm controlling condition codes are:
21405 0: normal, do nothing special
21406 1: make ASM_OUTPUT_OPCODE not output this instruction
21407 2: make ASM_OUTPUT_OPCODE not output this instruction
21408 3: make instructions conditional
21409 4: make instructions conditional
21411 State transitions (state->state by whom under condition):
21412 0 -> 1 final_prescan_insn if the `target' is a label
21413 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21414 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21415 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21416 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21417 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21418 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21419 (the target insn is arm_target_insn).
21421 If the jump clobbers the conditions then we use states 2 and 4.
21423 A similar thing can be done with conditional return insns.
21425 XXX In case the `target' is an unconditional branch, this conditionalising
21426 of the instructions always reduces code size, but not always execution
21427 time. But then, I want to reduce the code size to somewhere near what
21428 /bin/cc produces. */
21430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21431 instructions. When a COND_EXEC instruction is seen the subsequent
21432 instructions are scanned so that multiple conditional instructions can be
21433 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21434 specify the length and true/false mask for the IT block. These will be
21435 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21437 /* Returns the index of the ARM condition code string in
21438 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21439 COMPARISON should be an rtx like `(eq (...) (...))'. */
21441 enum arm_cond_code
21442 maybe_get_arm_condition_code (rtx comparison)
21444 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21445 enum arm_cond_code code;
21446 enum rtx_code comp_code = GET_CODE (comparison);
21448 if (GET_MODE_CLASS (mode) != MODE_CC)
21449 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21450 XEXP (comparison, 1));
21452 switch (mode)
21454 case CC_DNEmode: code = ARM_NE; goto dominance;
21455 case CC_DEQmode: code = ARM_EQ; goto dominance;
21456 case CC_DGEmode: code = ARM_GE; goto dominance;
21457 case CC_DGTmode: code = ARM_GT; goto dominance;
21458 case CC_DLEmode: code = ARM_LE; goto dominance;
21459 case CC_DLTmode: code = ARM_LT; goto dominance;
21460 case CC_DGEUmode: code = ARM_CS; goto dominance;
21461 case CC_DGTUmode: code = ARM_HI; goto dominance;
21462 case CC_DLEUmode: code = ARM_LS; goto dominance;
21463 case CC_DLTUmode: code = ARM_CC;
21465 dominance:
21466 if (comp_code == EQ)
21467 return ARM_INVERSE_CONDITION_CODE (code);
21468 if (comp_code == NE)
21469 return code;
21470 return ARM_NV;
21472 case CC_NOOVmode:
21473 switch (comp_code)
21475 case NE: return ARM_NE;
21476 case EQ: return ARM_EQ;
21477 case GE: return ARM_PL;
21478 case LT: return ARM_MI;
21479 default: return ARM_NV;
21482 case CC_Zmode:
21483 switch (comp_code)
21485 case NE: return ARM_NE;
21486 case EQ: return ARM_EQ;
21487 default: return ARM_NV;
21490 case CC_Nmode:
21491 switch (comp_code)
21493 case NE: return ARM_MI;
21494 case EQ: return ARM_PL;
21495 default: return ARM_NV;
21498 case CCFPEmode:
21499 case CCFPmode:
21500 /* We can handle all cases except UNEQ and LTGT. */
21501 switch (comp_code)
21503 case GE: return ARM_GE;
21504 case GT: return ARM_GT;
21505 case LE: return ARM_LS;
21506 case LT: return ARM_MI;
21507 case NE: return ARM_NE;
21508 case EQ: return ARM_EQ;
21509 case ORDERED: return ARM_VC;
21510 case UNORDERED: return ARM_VS;
21511 case UNLT: return ARM_LT;
21512 case UNLE: return ARM_LE;
21513 case UNGT: return ARM_HI;
21514 case UNGE: return ARM_PL;
21515 /* UNEQ and LTGT do not have a representation. */
21516 case UNEQ: /* Fall through. */
21517 case LTGT: /* Fall through. */
21518 default: return ARM_NV;
21521 case CC_SWPmode:
21522 switch (comp_code)
21524 case NE: return ARM_NE;
21525 case EQ: return ARM_EQ;
21526 case GE: return ARM_LE;
21527 case GT: return ARM_LT;
21528 case LE: return ARM_GE;
21529 case LT: return ARM_GT;
21530 case GEU: return ARM_LS;
21531 case GTU: return ARM_CC;
21532 case LEU: return ARM_CS;
21533 case LTU: return ARM_HI;
21534 default: return ARM_NV;
21537 case CC_Cmode:
21538 switch (comp_code)
21540 case LTU: return ARM_CS;
21541 case GEU: return ARM_CC;
21542 default: return ARM_NV;
21545 case CC_CZmode:
21546 switch (comp_code)
21548 case NE: return ARM_NE;
21549 case EQ: return ARM_EQ;
21550 case GEU: return ARM_CS;
21551 case GTU: return ARM_HI;
21552 case LEU: return ARM_LS;
21553 case LTU: return ARM_CC;
21554 default: return ARM_NV;
21557 case CC_NCVmode:
21558 switch (comp_code)
21560 case GE: return ARM_GE;
21561 case LT: return ARM_LT;
21562 case GEU: return ARM_CS;
21563 case LTU: return ARM_CC;
21564 default: return ARM_NV;
21567 case CCmode:
21568 switch (comp_code)
21570 case NE: return ARM_NE;
21571 case EQ: return ARM_EQ;
21572 case GE: return ARM_GE;
21573 case GT: return ARM_GT;
21574 case LE: return ARM_LE;
21575 case LT: return ARM_LT;
21576 case GEU: return ARM_CS;
21577 case GTU: return ARM_HI;
21578 case LEU: return ARM_LS;
21579 case LTU: return ARM_CC;
21580 default: return ARM_NV;
21583 default: gcc_unreachable ();
21587 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21588 static enum arm_cond_code
21589 get_arm_condition_code (rtx comparison)
21591 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
21592 gcc_assert (code != ARM_NV);
21593 return code;
21596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21597 instructions. */
21598 void
21599 thumb2_final_prescan_insn (rtx insn)
21601 rtx first_insn = insn;
21602 rtx body = PATTERN (insn);
21603 rtx predicate;
21604 enum arm_cond_code code;
21605 int n;
21606 int mask;
21607 int max;
21609 /* Maximum number of conditionally executed instructions in a block
21610 is minimum of the two max values: maximum allowed in an IT block
21611 and maximum that is beneficial according to the cost model and tune. */
21612 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
21613 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
21615 /* Remove the previous insn from the count of insns to be output. */
21616 if (arm_condexec_count)
21617 arm_condexec_count--;
21619 /* Nothing to do if we are already inside a conditional block. */
21620 if (arm_condexec_count)
21621 return;
21623 if (GET_CODE (body) != COND_EXEC)
21624 return;
21626 /* Conditional jumps are implemented directly. */
21627 if (JUMP_P (insn))
21628 return;
21630 predicate = COND_EXEC_TEST (body);
21631 arm_current_cc = get_arm_condition_code (predicate);
21633 n = get_attr_ce_count (insn);
21634 arm_condexec_count = 1;
21635 arm_condexec_mask = (1 << n) - 1;
21636 arm_condexec_masklen = n;
21637 /* See if subsequent instructions can be combined into the same block. */
21638 for (;;)
21640 insn = next_nonnote_insn (insn);
21642 /* Jumping into the middle of an IT block is illegal, so a label or
21643 barrier terminates the block. */
21644 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
21645 break;
21647 body = PATTERN (insn);
21648 /* USE and CLOBBER aren't really insns, so just skip them. */
21649 if (GET_CODE (body) == USE
21650 || GET_CODE (body) == CLOBBER)
21651 continue;
21653 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21654 if (GET_CODE (body) != COND_EXEC)
21655 break;
21656 /* Maximum number of conditionally executed instructions in a block. */
21657 n = get_attr_ce_count (insn);
21658 if (arm_condexec_masklen + n > max)
21659 break;
21661 predicate = COND_EXEC_TEST (body);
21662 code = get_arm_condition_code (predicate);
21663 mask = (1 << n) - 1;
21664 if (arm_current_cc == code)
21665 arm_condexec_mask |= (mask << arm_condexec_masklen);
21666 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
21667 break;
21669 arm_condexec_count++;
21670 arm_condexec_masklen += n;
21672 /* A jump must be the last instruction in a conditional block. */
21673 if (JUMP_P (insn))
21674 break;
21676 /* Restore recog_data (getting the attributes of other insns can
21677 destroy this array, but final.c assumes that it remains intact
21678 across this call). */
21679 extract_constrain_insn_cached (first_insn);
21682 void
21683 arm_final_prescan_insn (rtx insn)
21685 /* BODY will hold the body of INSN. */
21686 rtx body = PATTERN (insn);
21688 /* This will be 1 if trying to repeat the trick, and things need to be
21689 reversed if it appears to fail. */
21690 int reverse = 0;
21692 /* If we start with a return insn, we only succeed if we find another one. */
21693 int seeking_return = 0;
21694 enum rtx_code return_code = UNKNOWN;
21696 /* START_INSN will hold the insn from where we start looking. This is the
21697 first insn after the following code_label if REVERSE is true. */
21698 rtx start_insn = insn;
21700 /* If in state 4, check if the target branch is reached, in order to
21701 change back to state 0. */
21702 if (arm_ccfsm_state == 4)
21704 if (insn == arm_target_insn)
21706 arm_target_insn = NULL;
21707 arm_ccfsm_state = 0;
21709 return;
21712 /* If in state 3, it is possible to repeat the trick, if this insn is an
21713 unconditional branch to a label, and immediately following this branch
21714 is the previous target label which is only used once, and the label this
21715 branch jumps to is not too far off. */
21716 if (arm_ccfsm_state == 3)
21718 if (simplejump_p (insn))
21720 start_insn = next_nonnote_insn (start_insn);
21721 if (BARRIER_P (start_insn))
21723 /* XXX Isn't this always a barrier? */
21724 start_insn = next_nonnote_insn (start_insn);
21726 if (LABEL_P (start_insn)
21727 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
21728 && LABEL_NUSES (start_insn) == 1)
21729 reverse = TRUE;
21730 else
21731 return;
21733 else if (ANY_RETURN_P (body))
21735 start_insn = next_nonnote_insn (start_insn);
21736 if (BARRIER_P (start_insn))
21737 start_insn = next_nonnote_insn (start_insn);
21738 if (LABEL_P (start_insn)
21739 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
21740 && LABEL_NUSES (start_insn) == 1)
21742 reverse = TRUE;
21743 seeking_return = 1;
21744 return_code = GET_CODE (body);
21746 else
21747 return;
21749 else
21750 return;
21753 gcc_assert (!arm_ccfsm_state || reverse);
21754 if (!JUMP_P (insn))
21755 return;
21757 /* This jump might be paralleled with a clobber of the condition codes
21758 the jump should always come first */
21759 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
21760 body = XVECEXP (body, 0, 0);
21762 if (reverse
21763 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
21764 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
21766 int insns_skipped;
21767 int fail = FALSE, succeed = FALSE;
21768 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
21769 int then_not_else = TRUE;
21770 rtx this_insn = start_insn, label = 0;
21772 /* Register the insn jumped to. */
21773 if (reverse)
21775 if (!seeking_return)
21776 label = XEXP (SET_SRC (body), 0);
21778 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
21779 label = XEXP (XEXP (SET_SRC (body), 1), 0);
21780 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
21782 label = XEXP (XEXP (SET_SRC (body), 2), 0);
21783 then_not_else = FALSE;
21785 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
21787 seeking_return = 1;
21788 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
21790 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
21792 seeking_return = 1;
21793 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
21794 then_not_else = FALSE;
21796 else
21797 gcc_unreachable ();
21799 /* See how many insns this branch skips, and what kind of insns. If all
21800 insns are okay, and the label or unconditional branch to the same
21801 label is not too far away, succeed. */
21802 for (insns_skipped = 0;
21803 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
21805 rtx scanbody;
21807 this_insn = next_nonnote_insn (this_insn);
21808 if (!this_insn)
21809 break;
21811 switch (GET_CODE (this_insn))
21813 case CODE_LABEL:
21814 /* Succeed if it is the target label, otherwise fail since
21815 control falls in from somewhere else. */
21816 if (this_insn == label)
21818 arm_ccfsm_state = 1;
21819 succeed = TRUE;
21821 else
21822 fail = TRUE;
21823 break;
21825 case BARRIER:
21826 /* Succeed if the following insn is the target label.
21827 Otherwise fail.
21828 If return insns are used then the last insn in a function
21829 will be a barrier. */
21830 this_insn = next_nonnote_insn (this_insn);
21831 if (this_insn && this_insn == label)
21833 arm_ccfsm_state = 1;
21834 succeed = TRUE;
21836 else
21837 fail = TRUE;
21838 break;
21840 case CALL_INSN:
21841 /* The AAPCS says that conditional calls should not be
21842 used since they make interworking inefficient (the
21843 linker can't transform BL<cond> into BLX). That's
21844 only a problem if the machine has BLX. */
21845 if (arm_arch5)
21847 fail = TRUE;
21848 break;
21851 /* Succeed if the following insn is the target label, or
21852 if the following two insns are a barrier and the
21853 target label. */
21854 this_insn = next_nonnote_insn (this_insn);
21855 if (this_insn && BARRIER_P (this_insn))
21856 this_insn = next_nonnote_insn (this_insn);
21858 if (this_insn && this_insn == label
21859 && insns_skipped < max_insns_skipped)
21861 arm_ccfsm_state = 1;
21862 succeed = TRUE;
21864 else
21865 fail = TRUE;
21866 break;
21868 case JUMP_INSN:
21869 /* If this is an unconditional branch to the same label, succeed.
21870 If it is to another label, do nothing. If it is conditional,
21871 fail. */
21872 /* XXX Probably, the tests for SET and the PC are
21873 unnecessary. */
21875 scanbody = PATTERN (this_insn);
21876 if (GET_CODE (scanbody) == SET
21877 && GET_CODE (SET_DEST (scanbody)) == PC)
21879 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
21880 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
21882 arm_ccfsm_state = 2;
21883 succeed = TRUE;
21885 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
21886 fail = TRUE;
21888 /* Fail if a conditional return is undesirable (e.g. on a
21889 StrongARM), but still allow this if optimizing for size. */
21890 else if (GET_CODE (scanbody) == return_code
21891 && !use_return_insn (TRUE, NULL)
21892 && !optimize_size)
21893 fail = TRUE;
21894 else if (GET_CODE (scanbody) == return_code)
21896 arm_ccfsm_state = 2;
21897 succeed = TRUE;
21899 else if (GET_CODE (scanbody) == PARALLEL)
21901 switch (get_attr_conds (this_insn))
21903 case CONDS_NOCOND:
21904 break;
21905 default:
21906 fail = TRUE;
21907 break;
21910 else
21911 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
21913 break;
21915 case INSN:
21916 /* Instructions using or affecting the condition codes make it
21917 fail. */
21918 scanbody = PATTERN (this_insn);
21919 if (!(GET_CODE (scanbody) == SET
21920 || GET_CODE (scanbody) == PARALLEL)
21921 || get_attr_conds (this_insn) != CONDS_NOCOND)
21922 fail = TRUE;
21923 break;
21925 default:
21926 break;
21929 if (succeed)
21931 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
21932 arm_target_label = CODE_LABEL_NUMBER (label);
21933 else
21935 gcc_assert (seeking_return || arm_ccfsm_state == 2);
21937 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
21939 this_insn = next_nonnote_insn (this_insn);
21940 gcc_assert (!this_insn
21941 || (!BARRIER_P (this_insn)
21942 && !LABEL_P (this_insn)));
21944 if (!this_insn)
21946 /* Oh, dear! we ran off the end.. give up. */
21947 extract_constrain_insn_cached (insn);
21948 arm_ccfsm_state = 0;
21949 arm_target_insn = NULL;
21950 return;
21952 arm_target_insn = this_insn;
21955 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
21956 what it was. */
21957 if (!reverse)
21958 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
21960 if (reverse || then_not_else)
21961 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
21964 /* Restore recog_data (getting the attributes of other insns can
21965 destroy this array, but final.c assumes that it remains intact
21966 across this call. */
21967 extract_constrain_insn_cached (insn);
21971 /* Output IT instructions. */
21972 void
21973 thumb2_asm_output_opcode (FILE * stream)
21975 char buff[5];
21976 int n;
21978 if (arm_condexec_mask)
21980 for (n = 0; n < arm_condexec_masklen; n++)
21981 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
21982 buff[n] = 0;
21983 asm_fprintf(stream, "i%s\t%s\n\t", buff,
21984 arm_condition_codes[arm_current_cc]);
21985 arm_condexec_mask = 0;
21989 /* Returns true if REGNO is a valid register
21990 for holding a quantity of type MODE. */
21992 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
21994 if (GET_MODE_CLASS (mode) == MODE_CC)
21995 return (regno == CC_REGNUM
21996 || (TARGET_HARD_FLOAT && TARGET_VFP
21997 && regno == VFPCC_REGNUM));
21999 if (TARGET_THUMB1)
22000 /* For the Thumb we only allow values bigger than SImode in
22001 registers 0 - 6, so that there is always a second low
22002 register available to hold the upper part of the value.
22003 We probably we ought to ensure that the register is the
22004 start of an even numbered register pair. */
22005 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22007 if (TARGET_HARD_FLOAT && TARGET_VFP
22008 && IS_VFP_REGNUM (regno))
22010 if (mode == SFmode || mode == SImode)
22011 return VFP_REGNO_OK_FOR_SINGLE (regno);
22013 if (mode == DFmode)
22014 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22016 /* VFP registers can hold HFmode values, but there is no point in
22017 putting them there unless we have hardware conversion insns. */
22018 if (mode == HFmode)
22019 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22021 if (TARGET_NEON)
22022 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22023 || (VALID_NEON_QREG_MODE (mode)
22024 && NEON_REGNO_OK_FOR_QUAD (regno))
22025 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22026 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22027 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22028 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22029 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22031 return FALSE;
22034 if (TARGET_REALLY_IWMMXT)
22036 if (IS_IWMMXT_GR_REGNUM (regno))
22037 return mode == SImode;
22039 if (IS_IWMMXT_REGNUM (regno))
22040 return VALID_IWMMXT_REG_MODE (mode);
22043 /* We allow almost any value to be stored in the general registers.
22044 Restrict doubleword quantities to even register pairs so that we can
22045 use ldrd. Do not allow very large Neon structure opaque modes in
22046 general registers; they would use too many. */
22047 if (regno <= LAST_ARM_REGNUM)
22048 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22049 && ARM_NUM_REGS (mode) <= 4;
22051 if (regno == FRAME_POINTER_REGNUM
22052 || regno == ARG_POINTER_REGNUM)
22053 /* We only allow integers in the fake hard registers. */
22054 return GET_MODE_CLASS (mode) == MODE_INT;
22056 return FALSE;
22059 /* Implement MODES_TIEABLE_P. */
22061 bool
22062 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22064 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22065 return true;
22067 /* We specifically want to allow elements of "structure" modes to
22068 be tieable to the structure. This more general condition allows
22069 other rarer situations too. */
22070 if (TARGET_NEON
22071 && (VALID_NEON_DREG_MODE (mode1)
22072 || VALID_NEON_QREG_MODE (mode1)
22073 || VALID_NEON_STRUCT_MODE (mode1))
22074 && (VALID_NEON_DREG_MODE (mode2)
22075 || VALID_NEON_QREG_MODE (mode2)
22076 || VALID_NEON_STRUCT_MODE (mode2)))
22077 return true;
22079 return false;
22082 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22083 not used in arm mode. */
22085 enum reg_class
22086 arm_regno_class (int regno)
22088 if (TARGET_THUMB1)
22090 if (regno == STACK_POINTER_REGNUM)
22091 return STACK_REG;
22092 if (regno == CC_REGNUM)
22093 return CC_REG;
22094 if (regno < 8)
22095 return LO_REGS;
22096 return HI_REGS;
22099 if (TARGET_THUMB2 && regno < 8)
22100 return LO_REGS;
22102 if ( regno <= LAST_ARM_REGNUM
22103 || regno == FRAME_POINTER_REGNUM
22104 || regno == ARG_POINTER_REGNUM)
22105 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22107 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22108 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22110 if (IS_VFP_REGNUM (regno))
22112 if (regno <= D7_VFP_REGNUM)
22113 return VFP_D0_D7_REGS;
22114 else if (regno <= LAST_LO_VFP_REGNUM)
22115 return VFP_LO_REGS;
22116 else
22117 return VFP_HI_REGS;
22120 if (IS_IWMMXT_REGNUM (regno))
22121 return IWMMXT_REGS;
22123 if (IS_IWMMXT_GR_REGNUM (regno))
22124 return IWMMXT_GR_REGS;
22126 return NO_REGS;
22129 /* Handle a special case when computing the offset
22130 of an argument from the frame pointer. */
22132 arm_debugger_arg_offset (int value, rtx addr)
22134 rtx insn;
22136 /* We are only interested if dbxout_parms() failed to compute the offset. */
22137 if (value != 0)
22138 return 0;
22140 /* We can only cope with the case where the address is held in a register. */
22141 if (!REG_P (addr))
22142 return 0;
22144 /* If we are using the frame pointer to point at the argument, then
22145 an offset of 0 is correct. */
22146 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22147 return 0;
22149 /* If we are using the stack pointer to point at the
22150 argument, then an offset of 0 is correct. */
22151 /* ??? Check this is consistent with thumb2 frame layout. */
22152 if ((TARGET_THUMB || !frame_pointer_needed)
22153 && REGNO (addr) == SP_REGNUM)
22154 return 0;
22156 /* Oh dear. The argument is pointed to by a register rather
22157 than being held in a register, or being stored at a known
22158 offset from the frame pointer. Since GDB only understands
22159 those two kinds of argument we must translate the address
22160 held in the register into an offset from the frame pointer.
22161 We do this by searching through the insns for the function
22162 looking to see where this register gets its value. If the
22163 register is initialized from the frame pointer plus an offset
22164 then we are in luck and we can continue, otherwise we give up.
22166 This code is exercised by producing debugging information
22167 for a function with arguments like this:
22169 double func (double a, double b, int c, double d) {return d;}
22171 Without this code the stab for parameter 'd' will be set to
22172 an offset of 0 from the frame pointer, rather than 8. */
22174 /* The if() statement says:
22176 If the insn is a normal instruction
22177 and if the insn is setting the value in a register
22178 and if the register being set is the register holding the address of the argument
22179 and if the address is computing by an addition
22180 that involves adding to a register
22181 which is the frame pointer
22182 a constant integer
22184 then... */
22186 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22188 if ( NONJUMP_INSN_P (insn)
22189 && GET_CODE (PATTERN (insn)) == SET
22190 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22191 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22192 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22193 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22194 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22197 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22199 break;
22203 if (value == 0)
22205 debug_rtx (addr);
22206 warning (0, "unable to compute real location of stacked parameter");
22207 value = 8; /* XXX magic hack */
22210 return value;
22213 typedef enum {
22214 T_V8QI,
22215 T_V4HI,
22216 T_V4HF,
22217 T_V2SI,
22218 T_V2SF,
22219 T_DI,
22220 T_V16QI,
22221 T_V8HI,
22222 T_V4SI,
22223 T_V4SF,
22224 T_V2DI,
22225 T_TI,
22226 T_EI,
22227 T_OI,
22228 T_MAX /* Size of enum. Keep last. */
22229 } neon_builtin_type_mode;
22231 #define TYPE_MODE_BIT(X) (1 << (X))
22233 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22234 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22235 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22236 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22237 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22238 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22240 #define v8qi_UP T_V8QI
22241 #define v4hi_UP T_V4HI
22242 #define v4hf_UP T_V4HF
22243 #define v2si_UP T_V2SI
22244 #define v2sf_UP T_V2SF
22245 #define di_UP T_DI
22246 #define v16qi_UP T_V16QI
22247 #define v8hi_UP T_V8HI
22248 #define v4si_UP T_V4SI
22249 #define v4sf_UP T_V4SF
22250 #define v2di_UP T_V2DI
22251 #define ti_UP T_TI
22252 #define ei_UP T_EI
22253 #define oi_UP T_OI
22255 #define UP(X) X##_UP
22257 typedef enum {
22258 NEON_BINOP,
22259 NEON_TERNOP,
22260 NEON_UNOP,
22261 NEON_GETLANE,
22262 NEON_SETLANE,
22263 NEON_CREATE,
22264 NEON_RINT,
22265 NEON_DUP,
22266 NEON_DUPLANE,
22267 NEON_COMBINE,
22268 NEON_SPLIT,
22269 NEON_LANEMUL,
22270 NEON_LANEMULL,
22271 NEON_LANEMULH,
22272 NEON_LANEMAC,
22273 NEON_SCALARMUL,
22274 NEON_SCALARMULL,
22275 NEON_SCALARMULH,
22276 NEON_SCALARMAC,
22277 NEON_CONVERT,
22278 NEON_FLOAT_WIDEN,
22279 NEON_FLOAT_NARROW,
22280 NEON_FIXCONV,
22281 NEON_SELECT,
22282 NEON_RESULTPAIR,
22283 NEON_REINTERP,
22284 NEON_VTBL,
22285 NEON_VTBX,
22286 NEON_LOAD1,
22287 NEON_LOAD1LANE,
22288 NEON_STORE1,
22289 NEON_STORE1LANE,
22290 NEON_LOADSTRUCT,
22291 NEON_LOADSTRUCTLANE,
22292 NEON_STORESTRUCT,
22293 NEON_STORESTRUCTLANE,
22294 NEON_LOGICBINOP,
22295 NEON_SHIFTINSERT,
22296 NEON_SHIFTIMM,
22297 NEON_SHIFTACC
22298 } neon_itype;
22300 typedef struct {
22301 const char *name;
22302 const neon_itype itype;
22303 const neon_builtin_type_mode mode;
22304 const enum insn_code code;
22305 unsigned int fcode;
22306 } neon_builtin_datum;
22308 #define CF(N,X) CODE_FOR_neon_##N##X
22310 #define VAR1(T, N, A) \
22311 {#N, NEON_##T, UP (A), CF (N, A), 0}
22312 #define VAR2(T, N, A, B) \
22313 VAR1 (T, N, A), \
22314 {#N, NEON_##T, UP (B), CF (N, B), 0}
22315 #define VAR3(T, N, A, B, C) \
22316 VAR2 (T, N, A, B), \
22317 {#N, NEON_##T, UP (C), CF (N, C), 0}
22318 #define VAR4(T, N, A, B, C, D) \
22319 VAR3 (T, N, A, B, C), \
22320 {#N, NEON_##T, UP (D), CF (N, D), 0}
22321 #define VAR5(T, N, A, B, C, D, E) \
22322 VAR4 (T, N, A, B, C, D), \
22323 {#N, NEON_##T, UP (E), CF (N, E), 0}
22324 #define VAR6(T, N, A, B, C, D, E, F) \
22325 VAR5 (T, N, A, B, C, D, E), \
22326 {#N, NEON_##T, UP (F), CF (N, F), 0}
22327 #define VAR7(T, N, A, B, C, D, E, F, G) \
22328 VAR6 (T, N, A, B, C, D, E, F), \
22329 {#N, NEON_##T, UP (G), CF (N, G), 0}
22330 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22331 VAR7 (T, N, A, B, C, D, E, F, G), \
22332 {#N, NEON_##T, UP (H), CF (N, H), 0}
22333 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22334 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22335 {#N, NEON_##T, UP (I), CF (N, I), 0}
22336 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22337 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22338 {#N, NEON_##T, UP (J), CF (N, J), 0}
22340 /* The NEON builtin data can be found in arm_neon_builtins.def.
22341 The mode entries in the following table correspond to the "key" type of the
22342 instruction variant, i.e. equivalent to that which would be specified after
22343 the assembler mnemonic, which usually refers to the last vector operand.
22344 (Signed/unsigned/polynomial types are not differentiated between though, and
22345 are all mapped onto the same mode for a given element size.) The modes
22346 listed per instruction should be the same as those defined for that
22347 instruction's pattern in neon.md. */
22349 static neon_builtin_datum neon_builtin_data[] =
22351 #include "arm_neon_builtins.def"
22354 #undef CF
22355 #undef VAR1
22356 #undef VAR2
22357 #undef VAR3
22358 #undef VAR4
22359 #undef VAR5
22360 #undef VAR6
22361 #undef VAR7
22362 #undef VAR8
22363 #undef VAR9
22364 #undef VAR10
22366 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22367 #define VAR1(T, N, A) \
22368 CF (N, A)
22369 #define VAR2(T, N, A, B) \
22370 VAR1 (T, N, A), \
22371 CF (N, B)
22372 #define VAR3(T, N, A, B, C) \
22373 VAR2 (T, N, A, B), \
22374 CF (N, C)
22375 #define VAR4(T, N, A, B, C, D) \
22376 VAR3 (T, N, A, B, C), \
22377 CF (N, D)
22378 #define VAR5(T, N, A, B, C, D, E) \
22379 VAR4 (T, N, A, B, C, D), \
22380 CF (N, E)
22381 #define VAR6(T, N, A, B, C, D, E, F) \
22382 VAR5 (T, N, A, B, C, D, E), \
22383 CF (N, F)
22384 #define VAR7(T, N, A, B, C, D, E, F, G) \
22385 VAR6 (T, N, A, B, C, D, E, F), \
22386 CF (N, G)
22387 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22388 VAR7 (T, N, A, B, C, D, E, F, G), \
22389 CF (N, H)
22390 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22391 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22392 CF (N, I)
22393 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22394 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22395 CF (N, J)
22396 enum arm_builtins
22398 ARM_BUILTIN_GETWCGR0,
22399 ARM_BUILTIN_GETWCGR1,
22400 ARM_BUILTIN_GETWCGR2,
22401 ARM_BUILTIN_GETWCGR3,
22403 ARM_BUILTIN_SETWCGR0,
22404 ARM_BUILTIN_SETWCGR1,
22405 ARM_BUILTIN_SETWCGR2,
22406 ARM_BUILTIN_SETWCGR3,
22408 ARM_BUILTIN_WZERO,
22410 ARM_BUILTIN_WAVG2BR,
22411 ARM_BUILTIN_WAVG2HR,
22412 ARM_BUILTIN_WAVG2B,
22413 ARM_BUILTIN_WAVG2H,
22415 ARM_BUILTIN_WACCB,
22416 ARM_BUILTIN_WACCH,
22417 ARM_BUILTIN_WACCW,
22419 ARM_BUILTIN_WMACS,
22420 ARM_BUILTIN_WMACSZ,
22421 ARM_BUILTIN_WMACU,
22422 ARM_BUILTIN_WMACUZ,
22424 ARM_BUILTIN_WSADB,
22425 ARM_BUILTIN_WSADBZ,
22426 ARM_BUILTIN_WSADH,
22427 ARM_BUILTIN_WSADHZ,
22429 ARM_BUILTIN_WALIGNI,
22430 ARM_BUILTIN_WALIGNR0,
22431 ARM_BUILTIN_WALIGNR1,
22432 ARM_BUILTIN_WALIGNR2,
22433 ARM_BUILTIN_WALIGNR3,
22435 ARM_BUILTIN_TMIA,
22436 ARM_BUILTIN_TMIAPH,
22437 ARM_BUILTIN_TMIABB,
22438 ARM_BUILTIN_TMIABT,
22439 ARM_BUILTIN_TMIATB,
22440 ARM_BUILTIN_TMIATT,
22442 ARM_BUILTIN_TMOVMSKB,
22443 ARM_BUILTIN_TMOVMSKH,
22444 ARM_BUILTIN_TMOVMSKW,
22446 ARM_BUILTIN_TBCSTB,
22447 ARM_BUILTIN_TBCSTH,
22448 ARM_BUILTIN_TBCSTW,
22450 ARM_BUILTIN_WMADDS,
22451 ARM_BUILTIN_WMADDU,
22453 ARM_BUILTIN_WPACKHSS,
22454 ARM_BUILTIN_WPACKWSS,
22455 ARM_BUILTIN_WPACKDSS,
22456 ARM_BUILTIN_WPACKHUS,
22457 ARM_BUILTIN_WPACKWUS,
22458 ARM_BUILTIN_WPACKDUS,
22460 ARM_BUILTIN_WADDB,
22461 ARM_BUILTIN_WADDH,
22462 ARM_BUILTIN_WADDW,
22463 ARM_BUILTIN_WADDSSB,
22464 ARM_BUILTIN_WADDSSH,
22465 ARM_BUILTIN_WADDSSW,
22466 ARM_BUILTIN_WADDUSB,
22467 ARM_BUILTIN_WADDUSH,
22468 ARM_BUILTIN_WADDUSW,
22469 ARM_BUILTIN_WSUBB,
22470 ARM_BUILTIN_WSUBH,
22471 ARM_BUILTIN_WSUBW,
22472 ARM_BUILTIN_WSUBSSB,
22473 ARM_BUILTIN_WSUBSSH,
22474 ARM_BUILTIN_WSUBSSW,
22475 ARM_BUILTIN_WSUBUSB,
22476 ARM_BUILTIN_WSUBUSH,
22477 ARM_BUILTIN_WSUBUSW,
22479 ARM_BUILTIN_WAND,
22480 ARM_BUILTIN_WANDN,
22481 ARM_BUILTIN_WOR,
22482 ARM_BUILTIN_WXOR,
22484 ARM_BUILTIN_WCMPEQB,
22485 ARM_BUILTIN_WCMPEQH,
22486 ARM_BUILTIN_WCMPEQW,
22487 ARM_BUILTIN_WCMPGTUB,
22488 ARM_BUILTIN_WCMPGTUH,
22489 ARM_BUILTIN_WCMPGTUW,
22490 ARM_BUILTIN_WCMPGTSB,
22491 ARM_BUILTIN_WCMPGTSH,
22492 ARM_BUILTIN_WCMPGTSW,
22494 ARM_BUILTIN_TEXTRMSB,
22495 ARM_BUILTIN_TEXTRMSH,
22496 ARM_BUILTIN_TEXTRMSW,
22497 ARM_BUILTIN_TEXTRMUB,
22498 ARM_BUILTIN_TEXTRMUH,
22499 ARM_BUILTIN_TEXTRMUW,
22500 ARM_BUILTIN_TINSRB,
22501 ARM_BUILTIN_TINSRH,
22502 ARM_BUILTIN_TINSRW,
22504 ARM_BUILTIN_WMAXSW,
22505 ARM_BUILTIN_WMAXSH,
22506 ARM_BUILTIN_WMAXSB,
22507 ARM_BUILTIN_WMAXUW,
22508 ARM_BUILTIN_WMAXUH,
22509 ARM_BUILTIN_WMAXUB,
22510 ARM_BUILTIN_WMINSW,
22511 ARM_BUILTIN_WMINSH,
22512 ARM_BUILTIN_WMINSB,
22513 ARM_BUILTIN_WMINUW,
22514 ARM_BUILTIN_WMINUH,
22515 ARM_BUILTIN_WMINUB,
22517 ARM_BUILTIN_WMULUM,
22518 ARM_BUILTIN_WMULSM,
22519 ARM_BUILTIN_WMULUL,
22521 ARM_BUILTIN_PSADBH,
22522 ARM_BUILTIN_WSHUFH,
22524 ARM_BUILTIN_WSLLH,
22525 ARM_BUILTIN_WSLLW,
22526 ARM_BUILTIN_WSLLD,
22527 ARM_BUILTIN_WSRAH,
22528 ARM_BUILTIN_WSRAW,
22529 ARM_BUILTIN_WSRAD,
22530 ARM_BUILTIN_WSRLH,
22531 ARM_BUILTIN_WSRLW,
22532 ARM_BUILTIN_WSRLD,
22533 ARM_BUILTIN_WRORH,
22534 ARM_BUILTIN_WRORW,
22535 ARM_BUILTIN_WRORD,
22536 ARM_BUILTIN_WSLLHI,
22537 ARM_BUILTIN_WSLLWI,
22538 ARM_BUILTIN_WSLLDI,
22539 ARM_BUILTIN_WSRAHI,
22540 ARM_BUILTIN_WSRAWI,
22541 ARM_BUILTIN_WSRADI,
22542 ARM_BUILTIN_WSRLHI,
22543 ARM_BUILTIN_WSRLWI,
22544 ARM_BUILTIN_WSRLDI,
22545 ARM_BUILTIN_WRORHI,
22546 ARM_BUILTIN_WRORWI,
22547 ARM_BUILTIN_WRORDI,
22549 ARM_BUILTIN_WUNPCKIHB,
22550 ARM_BUILTIN_WUNPCKIHH,
22551 ARM_BUILTIN_WUNPCKIHW,
22552 ARM_BUILTIN_WUNPCKILB,
22553 ARM_BUILTIN_WUNPCKILH,
22554 ARM_BUILTIN_WUNPCKILW,
22556 ARM_BUILTIN_WUNPCKEHSB,
22557 ARM_BUILTIN_WUNPCKEHSH,
22558 ARM_BUILTIN_WUNPCKEHSW,
22559 ARM_BUILTIN_WUNPCKEHUB,
22560 ARM_BUILTIN_WUNPCKEHUH,
22561 ARM_BUILTIN_WUNPCKEHUW,
22562 ARM_BUILTIN_WUNPCKELSB,
22563 ARM_BUILTIN_WUNPCKELSH,
22564 ARM_BUILTIN_WUNPCKELSW,
22565 ARM_BUILTIN_WUNPCKELUB,
22566 ARM_BUILTIN_WUNPCKELUH,
22567 ARM_BUILTIN_WUNPCKELUW,
22569 ARM_BUILTIN_WABSB,
22570 ARM_BUILTIN_WABSH,
22571 ARM_BUILTIN_WABSW,
22573 ARM_BUILTIN_WADDSUBHX,
22574 ARM_BUILTIN_WSUBADDHX,
22576 ARM_BUILTIN_WABSDIFFB,
22577 ARM_BUILTIN_WABSDIFFH,
22578 ARM_BUILTIN_WABSDIFFW,
22580 ARM_BUILTIN_WADDCH,
22581 ARM_BUILTIN_WADDCW,
22583 ARM_BUILTIN_WAVG4,
22584 ARM_BUILTIN_WAVG4R,
22586 ARM_BUILTIN_WMADDSX,
22587 ARM_BUILTIN_WMADDUX,
22589 ARM_BUILTIN_WMADDSN,
22590 ARM_BUILTIN_WMADDUN,
22592 ARM_BUILTIN_WMULWSM,
22593 ARM_BUILTIN_WMULWUM,
22595 ARM_BUILTIN_WMULWSMR,
22596 ARM_BUILTIN_WMULWUMR,
22598 ARM_BUILTIN_WMULWL,
22600 ARM_BUILTIN_WMULSMR,
22601 ARM_BUILTIN_WMULUMR,
22603 ARM_BUILTIN_WQMULM,
22604 ARM_BUILTIN_WQMULMR,
22606 ARM_BUILTIN_WQMULWM,
22607 ARM_BUILTIN_WQMULWMR,
22609 ARM_BUILTIN_WADDBHUSM,
22610 ARM_BUILTIN_WADDBHUSL,
22612 ARM_BUILTIN_WQMIABB,
22613 ARM_BUILTIN_WQMIABT,
22614 ARM_BUILTIN_WQMIATB,
22615 ARM_BUILTIN_WQMIATT,
22617 ARM_BUILTIN_WQMIABBN,
22618 ARM_BUILTIN_WQMIABTN,
22619 ARM_BUILTIN_WQMIATBN,
22620 ARM_BUILTIN_WQMIATTN,
22622 ARM_BUILTIN_WMIABB,
22623 ARM_BUILTIN_WMIABT,
22624 ARM_BUILTIN_WMIATB,
22625 ARM_BUILTIN_WMIATT,
22627 ARM_BUILTIN_WMIABBN,
22628 ARM_BUILTIN_WMIABTN,
22629 ARM_BUILTIN_WMIATBN,
22630 ARM_BUILTIN_WMIATTN,
22632 ARM_BUILTIN_WMIAWBB,
22633 ARM_BUILTIN_WMIAWBT,
22634 ARM_BUILTIN_WMIAWTB,
22635 ARM_BUILTIN_WMIAWTT,
22637 ARM_BUILTIN_WMIAWBBN,
22638 ARM_BUILTIN_WMIAWBTN,
22639 ARM_BUILTIN_WMIAWTBN,
22640 ARM_BUILTIN_WMIAWTTN,
22642 ARM_BUILTIN_WMERGE,
22644 #include "arm_neon_builtins.def"
22646 ,ARM_BUILTIN_MAX
22649 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
22651 #undef CF
22652 #undef VAR1
22653 #undef VAR2
22654 #undef VAR3
22655 #undef VAR4
22656 #undef VAR5
22657 #undef VAR6
22658 #undef VAR7
22659 #undef VAR8
22660 #undef VAR9
22661 #undef VAR10
22663 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
22665 static void
22666 arm_init_neon_builtins (void)
22668 unsigned int i, fcode;
22669 tree decl;
22671 tree neon_intQI_type_node;
22672 tree neon_intHI_type_node;
22673 tree neon_floatHF_type_node;
22674 tree neon_polyQI_type_node;
22675 tree neon_polyHI_type_node;
22676 tree neon_intSI_type_node;
22677 tree neon_intDI_type_node;
22678 tree neon_float_type_node;
22680 tree intQI_pointer_node;
22681 tree intHI_pointer_node;
22682 tree intSI_pointer_node;
22683 tree intDI_pointer_node;
22684 tree float_pointer_node;
22686 tree const_intQI_node;
22687 tree const_intHI_node;
22688 tree const_intSI_node;
22689 tree const_intDI_node;
22690 tree const_float_node;
22692 tree const_intQI_pointer_node;
22693 tree const_intHI_pointer_node;
22694 tree const_intSI_pointer_node;
22695 tree const_intDI_pointer_node;
22696 tree const_float_pointer_node;
22698 tree V8QI_type_node;
22699 tree V4HI_type_node;
22700 tree V4HF_type_node;
22701 tree V2SI_type_node;
22702 tree V2SF_type_node;
22703 tree V16QI_type_node;
22704 tree V8HI_type_node;
22705 tree V4SI_type_node;
22706 tree V4SF_type_node;
22707 tree V2DI_type_node;
22709 tree intUQI_type_node;
22710 tree intUHI_type_node;
22711 tree intUSI_type_node;
22712 tree intUDI_type_node;
22714 tree intEI_type_node;
22715 tree intOI_type_node;
22716 tree intCI_type_node;
22717 tree intXI_type_node;
22719 tree V8QI_pointer_node;
22720 tree V4HI_pointer_node;
22721 tree V2SI_pointer_node;
22722 tree V2SF_pointer_node;
22723 tree V16QI_pointer_node;
22724 tree V8HI_pointer_node;
22725 tree V4SI_pointer_node;
22726 tree V4SF_pointer_node;
22727 tree V2DI_pointer_node;
22729 tree void_ftype_pv8qi_v8qi_v8qi;
22730 tree void_ftype_pv4hi_v4hi_v4hi;
22731 tree void_ftype_pv2si_v2si_v2si;
22732 tree void_ftype_pv2sf_v2sf_v2sf;
22733 tree void_ftype_pdi_di_di;
22734 tree void_ftype_pv16qi_v16qi_v16qi;
22735 tree void_ftype_pv8hi_v8hi_v8hi;
22736 tree void_ftype_pv4si_v4si_v4si;
22737 tree void_ftype_pv4sf_v4sf_v4sf;
22738 tree void_ftype_pv2di_v2di_v2di;
22740 tree reinterp_ftype_dreg[5][5];
22741 tree reinterp_ftype_qreg[5][5];
22742 tree dreg_types[5], qreg_types[5];
22744 /* Create distinguished type nodes for NEON vector element types,
22745 and pointers to values of such types, so we can detect them later. */
22746 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
22747 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
22748 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
22749 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
22750 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
22751 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
22752 neon_float_type_node = make_node (REAL_TYPE);
22753 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
22754 layout_type (neon_float_type_node);
22755 neon_floatHF_type_node = make_node (REAL_TYPE);
22756 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
22757 layout_type (neon_floatHF_type_node);
22759 /* Define typedefs which exactly correspond to the modes we are basing vector
22760 types on. If you change these names you'll need to change
22761 the table used by arm_mangle_type too. */
22762 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
22763 "__builtin_neon_qi");
22764 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
22765 "__builtin_neon_hi");
22766 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
22767 "__builtin_neon_hf");
22768 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
22769 "__builtin_neon_si");
22770 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
22771 "__builtin_neon_sf");
22772 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
22773 "__builtin_neon_di");
22774 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
22775 "__builtin_neon_poly8");
22776 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
22777 "__builtin_neon_poly16");
22779 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
22780 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
22781 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
22782 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
22783 float_pointer_node = build_pointer_type (neon_float_type_node);
22785 /* Next create constant-qualified versions of the above types. */
22786 const_intQI_node = build_qualified_type (neon_intQI_type_node,
22787 TYPE_QUAL_CONST);
22788 const_intHI_node = build_qualified_type (neon_intHI_type_node,
22789 TYPE_QUAL_CONST);
22790 const_intSI_node = build_qualified_type (neon_intSI_type_node,
22791 TYPE_QUAL_CONST);
22792 const_intDI_node = build_qualified_type (neon_intDI_type_node,
22793 TYPE_QUAL_CONST);
22794 const_float_node = build_qualified_type (neon_float_type_node,
22795 TYPE_QUAL_CONST);
22797 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
22798 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
22799 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
22800 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
22801 const_float_pointer_node = build_pointer_type (const_float_node);
22803 /* Now create vector types based on our NEON element types. */
22804 /* 64-bit vectors. */
22805 V8QI_type_node =
22806 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
22807 V4HI_type_node =
22808 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
22809 V4HF_type_node =
22810 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
22811 V2SI_type_node =
22812 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
22813 V2SF_type_node =
22814 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
22815 /* 128-bit vectors. */
22816 V16QI_type_node =
22817 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
22818 V8HI_type_node =
22819 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
22820 V4SI_type_node =
22821 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
22822 V4SF_type_node =
22823 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
22824 V2DI_type_node =
22825 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
22827 /* Unsigned integer types for various mode sizes. */
22828 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
22829 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
22830 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
22831 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
22833 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
22834 "__builtin_neon_uqi");
22835 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
22836 "__builtin_neon_uhi");
22837 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
22838 "__builtin_neon_usi");
22839 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
22840 "__builtin_neon_udi");
22842 /* Opaque integer types for structures of vectors. */
22843 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
22844 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
22845 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
22846 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
22848 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
22849 "__builtin_neon_ti");
22850 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
22851 "__builtin_neon_ei");
22852 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
22853 "__builtin_neon_oi");
22854 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
22855 "__builtin_neon_ci");
22856 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
22857 "__builtin_neon_xi");
22859 /* Pointers to vector types. */
22860 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
22861 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
22862 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
22863 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
22864 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
22865 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
22866 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
22867 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
22868 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
22870 /* Operations which return results as pairs. */
22871 void_ftype_pv8qi_v8qi_v8qi =
22872 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
22873 V8QI_type_node, NULL);
22874 void_ftype_pv4hi_v4hi_v4hi =
22875 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
22876 V4HI_type_node, NULL);
22877 void_ftype_pv2si_v2si_v2si =
22878 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
22879 V2SI_type_node, NULL);
22880 void_ftype_pv2sf_v2sf_v2sf =
22881 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
22882 V2SF_type_node, NULL);
22883 void_ftype_pdi_di_di =
22884 build_function_type_list (void_type_node, intDI_pointer_node,
22885 neon_intDI_type_node, neon_intDI_type_node, NULL);
22886 void_ftype_pv16qi_v16qi_v16qi =
22887 build_function_type_list (void_type_node, V16QI_pointer_node,
22888 V16QI_type_node, V16QI_type_node, NULL);
22889 void_ftype_pv8hi_v8hi_v8hi =
22890 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
22891 V8HI_type_node, NULL);
22892 void_ftype_pv4si_v4si_v4si =
22893 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
22894 V4SI_type_node, NULL);
22895 void_ftype_pv4sf_v4sf_v4sf =
22896 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
22897 V4SF_type_node, NULL);
22898 void_ftype_pv2di_v2di_v2di =
22899 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
22900 V2DI_type_node, NULL);
22902 dreg_types[0] = V8QI_type_node;
22903 dreg_types[1] = V4HI_type_node;
22904 dreg_types[2] = V2SI_type_node;
22905 dreg_types[3] = V2SF_type_node;
22906 dreg_types[4] = neon_intDI_type_node;
22908 qreg_types[0] = V16QI_type_node;
22909 qreg_types[1] = V8HI_type_node;
22910 qreg_types[2] = V4SI_type_node;
22911 qreg_types[3] = V4SF_type_node;
22912 qreg_types[4] = V2DI_type_node;
22914 for (i = 0; i < 5; i++)
22916 int j;
22917 for (j = 0; j < 5; j++)
22919 reinterp_ftype_dreg[i][j]
22920 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
22921 reinterp_ftype_qreg[i][j]
22922 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
22926 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
22927 i < ARRAY_SIZE (neon_builtin_data);
22928 i++, fcode++)
22930 neon_builtin_datum *d = &neon_builtin_data[i];
22932 const char* const modenames[] = {
22933 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
22934 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
22935 "ti", "ei", "oi"
22937 char namebuf[60];
22938 tree ftype = NULL;
22939 int is_load = 0, is_store = 0;
22941 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
22943 d->fcode = fcode;
22945 switch (d->itype)
22947 case NEON_LOAD1:
22948 case NEON_LOAD1LANE:
22949 case NEON_LOADSTRUCT:
22950 case NEON_LOADSTRUCTLANE:
22951 is_load = 1;
22952 /* Fall through. */
22953 case NEON_STORE1:
22954 case NEON_STORE1LANE:
22955 case NEON_STORESTRUCT:
22956 case NEON_STORESTRUCTLANE:
22957 if (!is_load)
22958 is_store = 1;
22959 /* Fall through. */
22960 case NEON_UNOP:
22961 case NEON_RINT:
22962 case NEON_BINOP:
22963 case NEON_LOGICBINOP:
22964 case NEON_SHIFTINSERT:
22965 case NEON_TERNOP:
22966 case NEON_GETLANE:
22967 case NEON_SETLANE:
22968 case NEON_CREATE:
22969 case NEON_DUP:
22970 case NEON_DUPLANE:
22971 case NEON_SHIFTIMM:
22972 case NEON_SHIFTACC:
22973 case NEON_COMBINE:
22974 case NEON_SPLIT:
22975 case NEON_CONVERT:
22976 case NEON_FIXCONV:
22977 case NEON_LANEMUL:
22978 case NEON_LANEMULL:
22979 case NEON_LANEMULH:
22980 case NEON_LANEMAC:
22981 case NEON_SCALARMUL:
22982 case NEON_SCALARMULL:
22983 case NEON_SCALARMULH:
22984 case NEON_SCALARMAC:
22985 case NEON_SELECT:
22986 case NEON_VTBL:
22987 case NEON_VTBX:
22989 int k;
22990 tree return_type = void_type_node, args = void_list_node;
22992 /* Build a function type directly from the insn_data for
22993 this builtin. The build_function_type() function takes
22994 care of removing duplicates for us. */
22995 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
22997 tree eltype;
22999 if (is_load && k == 1)
23001 /* Neon load patterns always have the memory
23002 operand in the operand 1 position. */
23003 gcc_assert (insn_data[d->code].operand[k].predicate
23004 == neon_struct_operand);
23006 switch (d->mode)
23008 case T_V8QI:
23009 case T_V16QI:
23010 eltype = const_intQI_pointer_node;
23011 break;
23013 case T_V4HI:
23014 case T_V8HI:
23015 eltype = const_intHI_pointer_node;
23016 break;
23018 case T_V2SI:
23019 case T_V4SI:
23020 eltype = const_intSI_pointer_node;
23021 break;
23023 case T_V2SF:
23024 case T_V4SF:
23025 eltype = const_float_pointer_node;
23026 break;
23028 case T_DI:
23029 case T_V2DI:
23030 eltype = const_intDI_pointer_node;
23031 break;
23033 default: gcc_unreachable ();
23036 else if (is_store && k == 0)
23038 /* Similarly, Neon store patterns use operand 0 as
23039 the memory location to store to. */
23040 gcc_assert (insn_data[d->code].operand[k].predicate
23041 == neon_struct_operand);
23043 switch (d->mode)
23045 case T_V8QI:
23046 case T_V16QI:
23047 eltype = intQI_pointer_node;
23048 break;
23050 case T_V4HI:
23051 case T_V8HI:
23052 eltype = intHI_pointer_node;
23053 break;
23055 case T_V2SI:
23056 case T_V4SI:
23057 eltype = intSI_pointer_node;
23058 break;
23060 case T_V2SF:
23061 case T_V4SF:
23062 eltype = float_pointer_node;
23063 break;
23065 case T_DI:
23066 case T_V2DI:
23067 eltype = intDI_pointer_node;
23068 break;
23070 default: gcc_unreachable ();
23073 else
23075 switch (insn_data[d->code].operand[k].mode)
23077 case VOIDmode: eltype = void_type_node; break;
23078 /* Scalars. */
23079 case QImode: eltype = neon_intQI_type_node; break;
23080 case HImode: eltype = neon_intHI_type_node; break;
23081 case SImode: eltype = neon_intSI_type_node; break;
23082 case SFmode: eltype = neon_float_type_node; break;
23083 case DImode: eltype = neon_intDI_type_node; break;
23084 case TImode: eltype = intTI_type_node; break;
23085 case EImode: eltype = intEI_type_node; break;
23086 case OImode: eltype = intOI_type_node; break;
23087 case CImode: eltype = intCI_type_node; break;
23088 case XImode: eltype = intXI_type_node; break;
23089 /* 64-bit vectors. */
23090 case V8QImode: eltype = V8QI_type_node; break;
23091 case V4HImode: eltype = V4HI_type_node; break;
23092 case V2SImode: eltype = V2SI_type_node; break;
23093 case V2SFmode: eltype = V2SF_type_node; break;
23094 /* 128-bit vectors. */
23095 case V16QImode: eltype = V16QI_type_node; break;
23096 case V8HImode: eltype = V8HI_type_node; break;
23097 case V4SImode: eltype = V4SI_type_node; break;
23098 case V4SFmode: eltype = V4SF_type_node; break;
23099 case V2DImode: eltype = V2DI_type_node; break;
23100 default: gcc_unreachable ();
23104 if (k == 0 && !is_store)
23105 return_type = eltype;
23106 else
23107 args = tree_cons (NULL_TREE, eltype, args);
23110 ftype = build_function_type (return_type, args);
23112 break;
23114 case NEON_RESULTPAIR:
23116 switch (insn_data[d->code].operand[1].mode)
23118 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23119 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23120 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23121 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23122 case DImode: ftype = void_ftype_pdi_di_di; break;
23123 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23124 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23125 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23126 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23127 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23128 default: gcc_unreachable ();
23131 break;
23133 case NEON_REINTERP:
23135 /* We iterate over 5 doubleword types, then 5 quadword
23136 types. V4HF is not a type used in reinterpret, so we translate
23137 d->mode to the correct index in reinterp_ftype_dreg. */
23138 int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
23139 switch (insn_data[d->code].operand[0].mode)
23141 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23142 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23143 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23144 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23145 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23146 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23147 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23148 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23149 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23150 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23151 default: gcc_unreachable ();
23154 break;
23155 case NEON_FLOAT_WIDEN:
23157 tree eltype = NULL_TREE;
23158 tree return_type = NULL_TREE;
23160 switch (insn_data[d->code].operand[1].mode)
23162 case V4HFmode:
23163 eltype = V4HF_type_node;
23164 return_type = V4SF_type_node;
23165 break;
23166 default: gcc_unreachable ();
23168 ftype = build_function_type_list (return_type, eltype, NULL);
23169 break;
23171 case NEON_FLOAT_NARROW:
23173 tree eltype = NULL_TREE;
23174 tree return_type = NULL_TREE;
23176 switch (insn_data[d->code].operand[1].mode)
23178 case V4SFmode:
23179 eltype = V4SF_type_node;
23180 return_type = V4HF_type_node;
23181 break;
23182 default: gcc_unreachable ();
23184 ftype = build_function_type_list (return_type, eltype, NULL);
23185 break;
23187 default:
23188 gcc_unreachable ();
23191 gcc_assert (ftype != NULL);
23193 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23195 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23196 NULL_TREE);
23197 arm_builtin_decls[fcode] = decl;
23201 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23202 do \
23204 if ((MASK) & insn_flags) \
23206 tree bdecl; \
23207 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23208 BUILT_IN_MD, NULL, NULL_TREE); \
23209 arm_builtin_decls[CODE] = bdecl; \
23212 while (0)
23214 struct builtin_description
23216 const unsigned int mask;
23217 const enum insn_code icode;
23218 const char * const name;
23219 const enum arm_builtins code;
23220 const enum rtx_code comparison;
23221 const unsigned int flag;
23224 static const struct builtin_description bdesc_2arg[] =
23226 #define IWMMXT_BUILTIN(code, string, builtin) \
23227 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23228 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23230 #define IWMMXT2_BUILTIN(code, string, builtin) \
23231 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23232 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23234 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23235 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23236 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23237 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23238 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23239 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23240 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23241 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23242 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23243 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23244 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23245 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23246 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23247 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23248 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23249 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23250 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23251 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23252 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23253 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23254 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23255 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23256 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23257 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23258 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23259 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23260 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23261 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23262 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23263 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23264 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23265 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23266 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23267 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23268 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23269 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23270 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23271 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23272 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23273 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23274 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23275 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23276 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23277 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23278 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23279 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23280 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23281 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23282 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23283 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23284 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23285 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23286 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23287 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23288 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23289 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23290 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23291 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23292 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23293 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23294 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23295 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23296 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23297 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23298 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23299 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23300 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23301 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23302 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23303 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23304 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23305 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23306 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23307 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23308 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23309 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23310 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23311 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23313 #define IWMMXT_BUILTIN2(code, builtin) \
23314 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23316 #define IWMMXT2_BUILTIN2(code, builtin) \
23317 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23319 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23320 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23321 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23322 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23323 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23324 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23325 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23326 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23327 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23328 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23331 static const struct builtin_description bdesc_1arg[] =
23333 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
23334 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
23335 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
23336 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
23337 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
23338 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
23339 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
23340 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
23341 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
23342 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
23343 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
23344 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
23345 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
23346 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
23347 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
23348 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
23349 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
23350 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
23351 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
23352 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
23353 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
23354 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
23355 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
23356 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
23359 /* Set up all the iWMMXt builtins. This is not called if
23360 TARGET_IWMMXT is zero. */
23362 static void
23363 arm_init_iwmmxt_builtins (void)
23365 const struct builtin_description * d;
23366 size_t i;
23368 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
23369 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
23370 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
23372 tree v8qi_ftype_v8qi_v8qi_int
23373 = build_function_type_list (V8QI_type_node,
23374 V8QI_type_node, V8QI_type_node,
23375 integer_type_node, NULL_TREE);
23376 tree v4hi_ftype_v4hi_int
23377 = build_function_type_list (V4HI_type_node,
23378 V4HI_type_node, integer_type_node, NULL_TREE);
23379 tree v2si_ftype_v2si_int
23380 = build_function_type_list (V2SI_type_node,
23381 V2SI_type_node, integer_type_node, NULL_TREE);
23382 tree v2si_ftype_di_di
23383 = build_function_type_list (V2SI_type_node,
23384 long_long_integer_type_node,
23385 long_long_integer_type_node,
23386 NULL_TREE);
23387 tree di_ftype_di_int
23388 = build_function_type_list (long_long_integer_type_node,
23389 long_long_integer_type_node,
23390 integer_type_node, NULL_TREE);
23391 tree di_ftype_di_int_int
23392 = build_function_type_list (long_long_integer_type_node,
23393 long_long_integer_type_node,
23394 integer_type_node,
23395 integer_type_node, NULL_TREE);
23396 tree int_ftype_v8qi
23397 = build_function_type_list (integer_type_node,
23398 V8QI_type_node, NULL_TREE);
23399 tree int_ftype_v4hi
23400 = build_function_type_list (integer_type_node,
23401 V4HI_type_node, NULL_TREE);
23402 tree int_ftype_v2si
23403 = build_function_type_list (integer_type_node,
23404 V2SI_type_node, NULL_TREE);
23405 tree int_ftype_v8qi_int
23406 = build_function_type_list (integer_type_node,
23407 V8QI_type_node, integer_type_node, NULL_TREE);
23408 tree int_ftype_v4hi_int
23409 = build_function_type_list (integer_type_node,
23410 V4HI_type_node, integer_type_node, NULL_TREE);
23411 tree int_ftype_v2si_int
23412 = build_function_type_list (integer_type_node,
23413 V2SI_type_node, integer_type_node, NULL_TREE);
23414 tree v8qi_ftype_v8qi_int_int
23415 = build_function_type_list (V8QI_type_node,
23416 V8QI_type_node, integer_type_node,
23417 integer_type_node, NULL_TREE);
23418 tree v4hi_ftype_v4hi_int_int
23419 = build_function_type_list (V4HI_type_node,
23420 V4HI_type_node, integer_type_node,
23421 integer_type_node, NULL_TREE);
23422 tree v2si_ftype_v2si_int_int
23423 = build_function_type_list (V2SI_type_node,
23424 V2SI_type_node, integer_type_node,
23425 integer_type_node, NULL_TREE);
23426 /* Miscellaneous. */
23427 tree v8qi_ftype_v4hi_v4hi
23428 = build_function_type_list (V8QI_type_node,
23429 V4HI_type_node, V4HI_type_node, NULL_TREE);
23430 tree v4hi_ftype_v2si_v2si
23431 = build_function_type_list (V4HI_type_node,
23432 V2SI_type_node, V2SI_type_node, NULL_TREE);
23433 tree v8qi_ftype_v4hi_v8qi
23434 = build_function_type_list (V8QI_type_node,
23435 V4HI_type_node, V8QI_type_node, NULL_TREE);
23436 tree v2si_ftype_v4hi_v4hi
23437 = build_function_type_list (V2SI_type_node,
23438 V4HI_type_node, V4HI_type_node, NULL_TREE);
23439 tree v2si_ftype_v8qi_v8qi
23440 = build_function_type_list (V2SI_type_node,
23441 V8QI_type_node, V8QI_type_node, NULL_TREE);
23442 tree v4hi_ftype_v4hi_di
23443 = build_function_type_list (V4HI_type_node,
23444 V4HI_type_node, long_long_integer_type_node,
23445 NULL_TREE);
23446 tree v2si_ftype_v2si_di
23447 = build_function_type_list (V2SI_type_node,
23448 V2SI_type_node, long_long_integer_type_node,
23449 NULL_TREE);
23450 tree di_ftype_void
23451 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
23452 tree int_ftype_void
23453 = build_function_type_list (integer_type_node, NULL_TREE);
23454 tree di_ftype_v8qi
23455 = build_function_type_list (long_long_integer_type_node,
23456 V8QI_type_node, NULL_TREE);
23457 tree di_ftype_v4hi
23458 = build_function_type_list (long_long_integer_type_node,
23459 V4HI_type_node, NULL_TREE);
23460 tree di_ftype_v2si
23461 = build_function_type_list (long_long_integer_type_node,
23462 V2SI_type_node, NULL_TREE);
23463 tree v2si_ftype_v4hi
23464 = build_function_type_list (V2SI_type_node,
23465 V4HI_type_node, NULL_TREE);
23466 tree v4hi_ftype_v8qi
23467 = build_function_type_list (V4HI_type_node,
23468 V8QI_type_node, NULL_TREE);
23469 tree v8qi_ftype_v8qi
23470 = build_function_type_list (V8QI_type_node,
23471 V8QI_type_node, NULL_TREE);
23472 tree v4hi_ftype_v4hi
23473 = build_function_type_list (V4HI_type_node,
23474 V4HI_type_node, NULL_TREE);
23475 tree v2si_ftype_v2si
23476 = build_function_type_list (V2SI_type_node,
23477 V2SI_type_node, NULL_TREE);
23479 tree di_ftype_di_v4hi_v4hi
23480 = build_function_type_list (long_long_unsigned_type_node,
23481 long_long_unsigned_type_node,
23482 V4HI_type_node, V4HI_type_node,
23483 NULL_TREE);
23485 tree di_ftype_v4hi_v4hi
23486 = build_function_type_list (long_long_unsigned_type_node,
23487 V4HI_type_node,V4HI_type_node,
23488 NULL_TREE);
23490 tree v2si_ftype_v2si_v4hi_v4hi
23491 = build_function_type_list (V2SI_type_node,
23492 V2SI_type_node, V4HI_type_node,
23493 V4HI_type_node, NULL_TREE);
23495 tree v2si_ftype_v2si_v8qi_v8qi
23496 = build_function_type_list (V2SI_type_node,
23497 V2SI_type_node, V8QI_type_node,
23498 V8QI_type_node, NULL_TREE);
23500 tree di_ftype_di_v2si_v2si
23501 = build_function_type_list (long_long_unsigned_type_node,
23502 long_long_unsigned_type_node,
23503 V2SI_type_node, V2SI_type_node,
23504 NULL_TREE);
23506 tree di_ftype_di_di_int
23507 = build_function_type_list (long_long_unsigned_type_node,
23508 long_long_unsigned_type_node,
23509 long_long_unsigned_type_node,
23510 integer_type_node, NULL_TREE);
23512 tree void_ftype_int
23513 = build_function_type_list (void_type_node,
23514 integer_type_node, NULL_TREE);
23516 tree v8qi_ftype_char
23517 = build_function_type_list (V8QI_type_node,
23518 signed_char_type_node, NULL_TREE);
23520 tree v4hi_ftype_short
23521 = build_function_type_list (V4HI_type_node,
23522 short_integer_type_node, NULL_TREE);
23524 tree v2si_ftype_int
23525 = build_function_type_list (V2SI_type_node,
23526 integer_type_node, NULL_TREE);
23528 /* Normal vector binops. */
23529 tree v8qi_ftype_v8qi_v8qi
23530 = build_function_type_list (V8QI_type_node,
23531 V8QI_type_node, V8QI_type_node, NULL_TREE);
23532 tree v4hi_ftype_v4hi_v4hi
23533 = build_function_type_list (V4HI_type_node,
23534 V4HI_type_node,V4HI_type_node, NULL_TREE);
23535 tree v2si_ftype_v2si_v2si
23536 = build_function_type_list (V2SI_type_node,
23537 V2SI_type_node, V2SI_type_node, NULL_TREE);
23538 tree di_ftype_di_di
23539 = build_function_type_list (long_long_unsigned_type_node,
23540 long_long_unsigned_type_node,
23541 long_long_unsigned_type_node,
23542 NULL_TREE);
23544 /* Add all builtins that are more or less simple operations on two
23545 operands. */
23546 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
23548 /* Use one of the operands; the target can have a different mode for
23549 mask-generating compares. */
23550 enum machine_mode mode;
23551 tree type;
23553 if (d->name == 0)
23554 continue;
23556 mode = insn_data[d->icode].operand[1].mode;
23558 switch (mode)
23560 case V8QImode:
23561 type = v8qi_ftype_v8qi_v8qi;
23562 break;
23563 case V4HImode:
23564 type = v4hi_ftype_v4hi_v4hi;
23565 break;
23566 case V2SImode:
23567 type = v2si_ftype_v2si_v2si;
23568 break;
23569 case DImode:
23570 type = di_ftype_di_di;
23571 break;
23573 default:
23574 gcc_unreachable ();
23577 def_mbuiltin (d->mask, d->name, type, d->code);
23580 /* Add the remaining MMX insns with somewhat more complicated types. */
23581 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
23582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
23583 ARM_BUILTIN_ ## CODE)
23585 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
23586 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
23587 ARM_BUILTIN_ ## CODE)
23589 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
23590 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
23591 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
23592 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
23593 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
23594 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
23595 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
23596 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
23597 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
23599 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
23600 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
23601 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
23602 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
23603 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
23604 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
23606 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
23607 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
23608 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
23609 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
23610 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
23611 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
23613 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
23614 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
23615 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
23616 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
23617 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
23618 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
23620 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
23621 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
23622 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
23623 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
23624 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
23625 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
23627 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
23629 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
23630 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
23631 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
23632 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
23633 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
23634 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
23635 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
23636 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
23637 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
23638 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
23640 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
23641 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
23642 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
23643 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
23644 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
23645 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
23646 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
23647 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
23648 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
23650 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
23651 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
23652 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
23654 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
23655 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
23656 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
23658 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
23659 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
23661 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
23662 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
23663 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
23664 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
23665 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
23666 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
23668 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
23669 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
23670 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
23671 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
23672 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
23673 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
23674 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
23675 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
23676 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
23677 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
23678 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
23679 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
23681 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
23682 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
23683 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
23684 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
23686 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
23687 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
23688 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
23689 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
23690 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
23691 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
23692 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
23694 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
23695 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
23696 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
23698 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
23699 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
23700 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
23701 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
23703 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
23704 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
23705 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
23706 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
23708 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
23709 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
23710 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
23711 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
23713 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
23714 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
23715 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
23716 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
23718 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
23719 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
23720 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
23721 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
23723 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
23724 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
23725 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
23726 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
23728 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
23730 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
23731 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
23732 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
23734 #undef iwmmx_mbuiltin
23735 #undef iwmmx2_mbuiltin
23738 static void
23739 arm_init_fp16_builtins (void)
23741 tree fp16_type = make_node (REAL_TYPE);
23742 TYPE_PRECISION (fp16_type) = 16;
23743 layout_type (fp16_type);
23744 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
23747 static void
23748 arm_init_builtins (void)
23750 if (TARGET_REALLY_IWMMXT)
23751 arm_init_iwmmxt_builtins ();
23753 if (TARGET_NEON)
23754 arm_init_neon_builtins ();
23756 if (arm_fp16_format)
23757 arm_init_fp16_builtins ();
23760 /* Return the ARM builtin for CODE. */
23762 static tree
23763 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23765 if (code >= ARM_BUILTIN_MAX)
23766 return error_mark_node;
23768 return arm_builtin_decls[code];
23771 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23773 static const char *
23774 arm_invalid_parameter_type (const_tree t)
23776 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23777 return N_("function parameters cannot have __fp16 type");
23778 return NULL;
23781 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23783 static const char *
23784 arm_invalid_return_type (const_tree t)
23786 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23787 return N_("functions cannot return __fp16 type");
23788 return NULL;
23791 /* Implement TARGET_PROMOTED_TYPE. */
23793 static tree
23794 arm_promoted_type (const_tree t)
23796 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23797 return float_type_node;
23798 return NULL_TREE;
23801 /* Implement TARGET_CONVERT_TO_TYPE.
23802 Specifically, this hook implements the peculiarity of the ARM
23803 half-precision floating-point C semantics that requires conversions between
23804 __fp16 to or from double to do an intermediate conversion to float. */
23806 static tree
23807 arm_convert_to_type (tree type, tree expr)
23809 tree fromtype = TREE_TYPE (expr);
23810 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23811 return NULL_TREE;
23812 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23813 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23814 return convert (type, convert (float_type_node, expr));
23815 return NULL_TREE;
23818 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23819 This simply adds HFmode as a supported mode; even though we don't
23820 implement arithmetic on this type directly, it's supported by
23821 optabs conversions, much the way the double-word arithmetic is
23822 special-cased in the default hook. */
23824 static bool
23825 arm_scalar_mode_supported_p (enum machine_mode mode)
23827 if (mode == HFmode)
23828 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23829 else if (ALL_FIXED_POINT_MODE_P (mode))
23830 return true;
23831 else
23832 return default_scalar_mode_supported_p (mode);
23835 /* Errors in the source file can cause expand_expr to return const0_rtx
23836 where we expect a vector. To avoid crashing, use one of the vector
23837 clear instructions. */
23839 static rtx
23840 safe_vector_operand (rtx x, enum machine_mode mode)
23842 if (x != const0_rtx)
23843 return x;
23844 x = gen_reg_rtx (mode);
23846 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
23847 : gen_rtx_SUBREG (DImode, x, 0)));
23848 return x;
23851 /* Subroutine of arm_expand_builtin to take care of binop insns. */
23853 static rtx
23854 arm_expand_binop_builtin (enum insn_code icode,
23855 tree exp, rtx target)
23857 rtx pat;
23858 tree arg0 = CALL_EXPR_ARG (exp, 0);
23859 tree arg1 = CALL_EXPR_ARG (exp, 1);
23860 rtx op0 = expand_normal (arg0);
23861 rtx op1 = expand_normal (arg1);
23862 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23863 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23864 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23866 if (VECTOR_MODE_P (mode0))
23867 op0 = safe_vector_operand (op0, mode0);
23868 if (VECTOR_MODE_P (mode1))
23869 op1 = safe_vector_operand (op1, mode1);
23871 if (! target
23872 || GET_MODE (target) != tmode
23873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23874 target = gen_reg_rtx (tmode);
23876 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
23877 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
23879 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23880 op0 = copy_to_mode_reg (mode0, op0);
23881 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
23882 op1 = copy_to_mode_reg (mode1, op1);
23884 pat = GEN_FCN (icode) (target, op0, op1);
23885 if (! pat)
23886 return 0;
23887 emit_insn (pat);
23888 return target;
23891 /* Subroutine of arm_expand_builtin to take care of unop insns. */
23893 static rtx
23894 arm_expand_unop_builtin (enum insn_code icode,
23895 tree exp, rtx target, int do_load)
23897 rtx pat;
23898 tree arg0 = CALL_EXPR_ARG (exp, 0);
23899 rtx op0 = expand_normal (arg0);
23900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23901 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23903 if (! target
23904 || GET_MODE (target) != tmode
23905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23906 target = gen_reg_rtx (tmode);
23907 if (do_load)
23908 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
23909 else
23911 if (VECTOR_MODE_P (mode0))
23912 op0 = safe_vector_operand (op0, mode0);
23914 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23915 op0 = copy_to_mode_reg (mode0, op0);
23918 pat = GEN_FCN (icode) (target, op0);
23919 if (! pat)
23920 return 0;
23921 emit_insn (pat);
23922 return target;
23925 typedef enum {
23926 NEON_ARG_COPY_TO_REG,
23927 NEON_ARG_CONSTANT,
23928 NEON_ARG_MEMORY,
23929 NEON_ARG_STOP
23930 } builtin_arg;
23932 #define NEON_MAX_BUILTIN_ARGS 5
23934 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
23935 and return an expression for the accessed memory.
23937 The intrinsic function operates on a block of registers that has
23938 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
23939 function references the memory at EXP of type TYPE and in mode
23940 MEM_MODE; this mode may be BLKmode if no more suitable mode is
23941 available. */
23943 static tree
23944 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
23945 enum machine_mode reg_mode,
23946 neon_builtin_type_mode type_mode)
23948 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
23949 tree elem_type, upper_bound, array_type;
23951 /* Work out the size of the register block in bytes. */
23952 reg_size = GET_MODE_SIZE (reg_mode);
23954 /* Work out the size of each vector in bytes. */
23955 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
23956 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
23958 /* Work out how many vectors there are. */
23959 gcc_assert (reg_size % vector_size == 0);
23960 nvectors = reg_size / vector_size;
23962 /* Work out the type of each element. */
23963 gcc_assert (POINTER_TYPE_P (type));
23964 elem_type = TREE_TYPE (type);
23966 /* Work out how many elements are being loaded or stored.
23967 MEM_MODE == REG_MODE implies a one-to-one mapping between register
23968 and memory elements; anything else implies a lane load or store. */
23969 if (mem_mode == reg_mode)
23970 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
23971 else
23972 nelems = nvectors;
23974 /* Create a type that describes the full access. */
23975 upper_bound = build_int_cst (size_type_node, nelems - 1);
23976 array_type = build_array_type (elem_type, build_index_type (upper_bound));
23978 /* Dereference EXP using that type. */
23979 return fold_build2 (MEM_REF, array_type, exp,
23980 build_int_cst (build_pointer_type (array_type), 0));
23983 /* Expand a Neon builtin. */
23984 static rtx
23985 arm_expand_neon_args (rtx target, int icode, int have_retval,
23986 neon_builtin_type_mode type_mode,
23987 tree exp, int fcode, ...)
23989 va_list ap;
23990 rtx pat;
23991 tree arg[NEON_MAX_BUILTIN_ARGS];
23992 rtx op[NEON_MAX_BUILTIN_ARGS];
23993 tree arg_type;
23994 tree formals;
23995 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23996 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
23997 enum machine_mode other_mode;
23998 int argc = 0;
23999 int opno;
24001 if (have_retval
24002 && (!target
24003 || GET_MODE (target) != tmode
24004 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24005 target = gen_reg_rtx (tmode);
24007 va_start (ap, fcode);
24009 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24011 for (;;)
24013 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24015 if (thisarg == NEON_ARG_STOP)
24016 break;
24017 else
24019 opno = argc + have_retval;
24020 mode[argc] = insn_data[icode].operand[opno].mode;
24021 arg[argc] = CALL_EXPR_ARG (exp, argc);
24022 arg_type = TREE_VALUE (formals);
24023 if (thisarg == NEON_ARG_MEMORY)
24025 other_mode = insn_data[icode].operand[1 - opno].mode;
24026 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24027 mode[argc], other_mode,
24028 type_mode);
24031 op[argc] = expand_normal (arg[argc]);
24033 switch (thisarg)
24035 case NEON_ARG_COPY_TO_REG:
24036 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24037 if (!(*insn_data[icode].operand[opno].predicate)
24038 (op[argc], mode[argc]))
24039 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24040 break;
24042 case NEON_ARG_CONSTANT:
24043 /* FIXME: This error message is somewhat unhelpful. */
24044 if (!(*insn_data[icode].operand[opno].predicate)
24045 (op[argc], mode[argc]))
24046 error ("argument must be a constant");
24047 break;
24049 case NEON_ARG_MEMORY:
24050 gcc_assert (MEM_P (op[argc]));
24051 PUT_MODE (op[argc], mode[argc]);
24052 /* ??? arm_neon.h uses the same built-in functions for signed
24053 and unsigned accesses, casting where necessary. This isn't
24054 alias safe. */
24055 set_mem_alias_set (op[argc], 0);
24056 if (!(*insn_data[icode].operand[opno].predicate)
24057 (op[argc], mode[argc]))
24058 op[argc] = (replace_equiv_address
24059 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24060 break;
24062 case NEON_ARG_STOP:
24063 gcc_unreachable ();
24066 argc++;
24067 formals = TREE_CHAIN (formals);
24071 va_end (ap);
24073 if (have_retval)
24074 switch (argc)
24076 case 1:
24077 pat = GEN_FCN (icode) (target, op[0]);
24078 break;
24080 case 2:
24081 pat = GEN_FCN (icode) (target, op[0], op[1]);
24082 break;
24084 case 3:
24085 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24086 break;
24088 case 4:
24089 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24090 break;
24092 case 5:
24093 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24094 break;
24096 default:
24097 gcc_unreachable ();
24099 else
24100 switch (argc)
24102 case 1:
24103 pat = GEN_FCN (icode) (op[0]);
24104 break;
24106 case 2:
24107 pat = GEN_FCN (icode) (op[0], op[1]);
24108 break;
24110 case 3:
24111 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24112 break;
24114 case 4:
24115 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24116 break;
24118 case 5:
24119 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24120 break;
24122 default:
24123 gcc_unreachable ();
24126 if (!pat)
24127 return 0;
24129 emit_insn (pat);
24131 return target;
24134 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24135 constants defined per-instruction or per instruction-variant. Instead, the
24136 required info is looked up in the table neon_builtin_data. */
24137 static rtx
24138 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24140 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24141 neon_itype itype = d->itype;
24142 enum insn_code icode = d->code;
24143 neon_builtin_type_mode type_mode = d->mode;
24145 switch (itype)
24147 case NEON_UNOP:
24148 case NEON_CONVERT:
24149 case NEON_DUPLANE:
24150 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24151 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24153 case NEON_BINOP:
24154 case NEON_SETLANE:
24155 case NEON_SCALARMUL:
24156 case NEON_SCALARMULL:
24157 case NEON_SCALARMULH:
24158 case NEON_SHIFTINSERT:
24159 case NEON_LOGICBINOP:
24160 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24161 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24162 NEON_ARG_STOP);
24164 case NEON_TERNOP:
24165 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24166 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24167 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24169 case NEON_GETLANE:
24170 case NEON_FIXCONV:
24171 case NEON_SHIFTIMM:
24172 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24173 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24174 NEON_ARG_STOP);
24176 case NEON_CREATE:
24177 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24178 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24180 case NEON_DUP:
24181 case NEON_RINT:
24182 case NEON_SPLIT:
24183 case NEON_FLOAT_WIDEN:
24184 case NEON_FLOAT_NARROW:
24185 case NEON_REINTERP:
24186 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24187 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24189 case NEON_COMBINE:
24190 case NEON_VTBL:
24191 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24192 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24194 case NEON_RESULTPAIR:
24195 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24196 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24197 NEON_ARG_STOP);
24199 case NEON_LANEMUL:
24200 case NEON_LANEMULL:
24201 case NEON_LANEMULH:
24202 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24203 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24204 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24206 case NEON_LANEMAC:
24207 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24208 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24209 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24211 case NEON_SHIFTACC:
24212 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24213 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24214 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24216 case NEON_SCALARMAC:
24217 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24218 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24219 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24221 case NEON_SELECT:
24222 case NEON_VTBX:
24223 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24224 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24225 NEON_ARG_STOP);
24227 case NEON_LOAD1:
24228 case NEON_LOADSTRUCT:
24229 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24230 NEON_ARG_MEMORY, NEON_ARG_STOP);
24232 case NEON_LOAD1LANE:
24233 case NEON_LOADSTRUCTLANE:
24234 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24235 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24236 NEON_ARG_STOP);
24238 case NEON_STORE1:
24239 case NEON_STORESTRUCT:
24240 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24241 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24243 case NEON_STORE1LANE:
24244 case NEON_STORESTRUCTLANE:
24245 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24246 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24247 NEON_ARG_STOP);
24250 gcc_unreachable ();
24253 /* Emit code to reinterpret one Neon type as another, without altering bits. */
24254 void
24255 neon_reinterpret (rtx dest, rtx src)
24257 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
24260 /* Emit code to place a Neon pair result in memory locations (with equal
24261 registers). */
24262 void
24263 neon_emit_pair_result_insn (enum machine_mode mode,
24264 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
24265 rtx op1, rtx op2)
24267 rtx mem = gen_rtx_MEM (mode, destaddr);
24268 rtx tmp1 = gen_reg_rtx (mode);
24269 rtx tmp2 = gen_reg_rtx (mode);
24271 emit_insn (intfn (tmp1, op1, op2, tmp2));
24273 emit_move_insn (mem, tmp1);
24274 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
24275 emit_move_insn (mem, tmp2);
24278 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24279 not to early-clobber SRC registers in the process.
24281 We assume that the operands described by SRC and DEST represent a
24282 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24283 number of components into which the copy has been decomposed. */
24284 void
24285 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24287 unsigned int i;
24289 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24290 || REGNO (operands[0]) < REGNO (operands[1]))
24292 for (i = 0; i < count; i++)
24294 operands[2 * i] = dest[i];
24295 operands[2 * i + 1] = src[i];
24298 else
24300 for (i = 0; i < count; i++)
24302 operands[2 * i] = dest[count - i - 1];
24303 operands[2 * i + 1] = src[count - i - 1];
24308 /* Split operands into moves from op[1] + op[2] into op[0]. */
24310 void
24311 neon_split_vcombine (rtx operands[3])
24313 unsigned int dest = REGNO (operands[0]);
24314 unsigned int src1 = REGNO (operands[1]);
24315 unsigned int src2 = REGNO (operands[2]);
24316 enum machine_mode halfmode = GET_MODE (operands[1]);
24317 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
24318 rtx destlo, desthi;
24320 if (src1 == dest && src2 == dest + halfregs)
24322 /* No-op move. Can't split to nothing; emit something. */
24323 emit_note (NOTE_INSN_DELETED);
24324 return;
24327 /* Preserve register attributes for variable tracking. */
24328 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24329 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24330 GET_MODE_SIZE (halfmode));
24332 /* Special case of reversed high/low parts. Use VSWP. */
24333 if (src2 == dest && src1 == dest + halfregs)
24335 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
24336 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
24337 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24338 return;
24341 if (!reg_overlap_mentioned_p (operands[2], destlo))
24343 /* Try to avoid unnecessary moves if part of the result
24344 is in the right place already. */
24345 if (src1 != dest)
24346 emit_move_insn (destlo, operands[1]);
24347 if (src2 != dest + halfregs)
24348 emit_move_insn (desthi, operands[2]);
24350 else
24352 if (src2 != dest + halfregs)
24353 emit_move_insn (desthi, operands[2]);
24354 if (src1 != dest)
24355 emit_move_insn (destlo, operands[1]);
24359 /* Expand an expression EXP that calls a built-in function,
24360 with result going to TARGET if that's convenient
24361 (and in mode MODE if that's convenient).
24362 SUBTARGET may be used as the target for computing one of EXP's operands.
24363 IGNORE is nonzero if the value is to be ignored. */
24365 static rtx
24366 arm_expand_builtin (tree exp,
24367 rtx target,
24368 rtx subtarget ATTRIBUTE_UNUSED,
24369 enum machine_mode mode ATTRIBUTE_UNUSED,
24370 int ignore ATTRIBUTE_UNUSED)
24372 const struct builtin_description * d;
24373 enum insn_code icode;
24374 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24375 tree arg0;
24376 tree arg1;
24377 tree arg2;
24378 rtx op0;
24379 rtx op1;
24380 rtx op2;
24381 rtx pat;
24382 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24383 size_t i;
24384 enum machine_mode tmode;
24385 enum machine_mode mode0;
24386 enum machine_mode mode1;
24387 enum machine_mode mode2;
24388 int opint;
24389 int selector;
24390 int mask;
24391 int imm;
24393 if (fcode >= ARM_BUILTIN_NEON_BASE)
24394 return arm_expand_neon_builtin (fcode, exp, target);
24396 switch (fcode)
24398 case ARM_BUILTIN_TEXTRMSB:
24399 case ARM_BUILTIN_TEXTRMUB:
24400 case ARM_BUILTIN_TEXTRMSH:
24401 case ARM_BUILTIN_TEXTRMUH:
24402 case ARM_BUILTIN_TEXTRMSW:
24403 case ARM_BUILTIN_TEXTRMUW:
24404 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
24405 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
24406 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
24407 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
24408 : CODE_FOR_iwmmxt_textrmw);
24410 arg0 = CALL_EXPR_ARG (exp, 0);
24411 arg1 = CALL_EXPR_ARG (exp, 1);
24412 op0 = expand_normal (arg0);
24413 op1 = expand_normal (arg1);
24414 tmode = insn_data[icode].operand[0].mode;
24415 mode0 = insn_data[icode].operand[1].mode;
24416 mode1 = insn_data[icode].operand[2].mode;
24418 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24419 op0 = copy_to_mode_reg (mode0, op0);
24420 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24422 /* @@@ better error message */
24423 error ("selector must be an immediate");
24424 return gen_reg_rtx (tmode);
24427 opint = INTVAL (op1);
24428 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
24430 if (opint > 7 || opint < 0)
24431 error ("the range of selector should be in 0 to 7");
24433 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
24435 if (opint > 3 || opint < 0)
24436 error ("the range of selector should be in 0 to 3");
24438 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
24440 if (opint > 1 || opint < 0)
24441 error ("the range of selector should be in 0 to 1");
24444 if (target == 0
24445 || GET_MODE (target) != tmode
24446 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24447 target = gen_reg_rtx (tmode);
24448 pat = GEN_FCN (icode) (target, op0, op1);
24449 if (! pat)
24450 return 0;
24451 emit_insn (pat);
24452 return target;
24454 case ARM_BUILTIN_WALIGNI:
24455 /* If op2 is immediate, call walighi, else call walighr. */
24456 arg0 = CALL_EXPR_ARG (exp, 0);
24457 arg1 = CALL_EXPR_ARG (exp, 1);
24458 arg2 = CALL_EXPR_ARG (exp, 2);
24459 op0 = expand_normal (arg0);
24460 op1 = expand_normal (arg1);
24461 op2 = expand_normal (arg2);
24462 if (CONST_INT_P (op2))
24464 icode = CODE_FOR_iwmmxt_waligni;
24465 tmode = insn_data[icode].operand[0].mode;
24466 mode0 = insn_data[icode].operand[1].mode;
24467 mode1 = insn_data[icode].operand[2].mode;
24468 mode2 = insn_data[icode].operand[3].mode;
24469 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24470 op0 = copy_to_mode_reg (mode0, op0);
24471 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24472 op1 = copy_to_mode_reg (mode1, op1);
24473 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
24474 selector = INTVAL (op2);
24475 if (selector > 7 || selector < 0)
24476 error ("the range of selector should be in 0 to 7");
24478 else
24480 icode = CODE_FOR_iwmmxt_walignr;
24481 tmode = insn_data[icode].operand[0].mode;
24482 mode0 = insn_data[icode].operand[1].mode;
24483 mode1 = insn_data[icode].operand[2].mode;
24484 mode2 = insn_data[icode].operand[3].mode;
24485 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24486 op0 = copy_to_mode_reg (mode0, op0);
24487 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24488 op1 = copy_to_mode_reg (mode1, op1);
24489 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
24490 op2 = copy_to_mode_reg (mode2, op2);
24492 if (target == 0
24493 || GET_MODE (target) != tmode
24494 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
24495 target = gen_reg_rtx (tmode);
24496 pat = GEN_FCN (icode) (target, op0, op1, op2);
24497 if (!pat)
24498 return 0;
24499 emit_insn (pat);
24500 return target;
24502 case ARM_BUILTIN_TINSRB:
24503 case ARM_BUILTIN_TINSRH:
24504 case ARM_BUILTIN_TINSRW:
24505 case ARM_BUILTIN_WMERGE:
24506 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
24507 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
24508 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
24509 : CODE_FOR_iwmmxt_tinsrw);
24510 arg0 = CALL_EXPR_ARG (exp, 0);
24511 arg1 = CALL_EXPR_ARG (exp, 1);
24512 arg2 = CALL_EXPR_ARG (exp, 2);
24513 op0 = expand_normal (arg0);
24514 op1 = expand_normal (arg1);
24515 op2 = expand_normal (arg2);
24516 tmode = insn_data[icode].operand[0].mode;
24517 mode0 = insn_data[icode].operand[1].mode;
24518 mode1 = insn_data[icode].operand[2].mode;
24519 mode2 = insn_data[icode].operand[3].mode;
24521 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24522 op0 = copy_to_mode_reg (mode0, op0);
24523 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24524 op1 = copy_to_mode_reg (mode1, op1);
24525 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24527 error ("selector must be an immediate");
24528 return const0_rtx;
24530 if (icode == CODE_FOR_iwmmxt_wmerge)
24532 selector = INTVAL (op2);
24533 if (selector > 7 || selector < 0)
24534 error ("the range of selector should be in 0 to 7");
24536 if ((icode == CODE_FOR_iwmmxt_tinsrb)
24537 || (icode == CODE_FOR_iwmmxt_tinsrh)
24538 || (icode == CODE_FOR_iwmmxt_tinsrw))
24540 mask = 0x01;
24541 selector= INTVAL (op2);
24542 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
24543 error ("the range of selector should be in 0 to 7");
24544 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
24545 error ("the range of selector should be in 0 to 3");
24546 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
24547 error ("the range of selector should be in 0 to 1");
24548 mask <<= selector;
24549 op2 = GEN_INT (mask);
24551 if (target == 0
24552 || GET_MODE (target) != tmode
24553 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24554 target = gen_reg_rtx (tmode);
24555 pat = GEN_FCN (icode) (target, op0, op1, op2);
24556 if (! pat)
24557 return 0;
24558 emit_insn (pat);
24559 return target;
24561 case ARM_BUILTIN_SETWCGR0:
24562 case ARM_BUILTIN_SETWCGR1:
24563 case ARM_BUILTIN_SETWCGR2:
24564 case ARM_BUILTIN_SETWCGR3:
24565 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
24566 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
24567 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
24568 : CODE_FOR_iwmmxt_setwcgr3);
24569 arg0 = CALL_EXPR_ARG (exp, 0);
24570 op0 = expand_normal (arg0);
24571 mode0 = insn_data[icode].operand[0].mode;
24572 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
24573 op0 = copy_to_mode_reg (mode0, op0);
24574 pat = GEN_FCN (icode) (op0);
24575 if (!pat)
24576 return 0;
24577 emit_insn (pat);
24578 return 0;
24580 case ARM_BUILTIN_GETWCGR0:
24581 case ARM_BUILTIN_GETWCGR1:
24582 case ARM_BUILTIN_GETWCGR2:
24583 case ARM_BUILTIN_GETWCGR3:
24584 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
24585 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
24586 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
24587 : CODE_FOR_iwmmxt_getwcgr3);
24588 tmode = insn_data[icode].operand[0].mode;
24589 if (target == 0
24590 || GET_MODE (target) != tmode
24591 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
24592 target = gen_reg_rtx (tmode);
24593 pat = GEN_FCN (icode) (target);
24594 if (!pat)
24595 return 0;
24596 emit_insn (pat);
24597 return target;
24599 case ARM_BUILTIN_WSHUFH:
24600 icode = CODE_FOR_iwmmxt_wshufh;
24601 arg0 = CALL_EXPR_ARG (exp, 0);
24602 arg1 = CALL_EXPR_ARG (exp, 1);
24603 op0 = expand_normal (arg0);
24604 op1 = expand_normal (arg1);
24605 tmode = insn_data[icode].operand[0].mode;
24606 mode1 = insn_data[icode].operand[1].mode;
24607 mode2 = insn_data[icode].operand[2].mode;
24609 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
24610 op0 = copy_to_mode_reg (mode1, op0);
24611 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
24613 error ("mask must be an immediate");
24614 return const0_rtx;
24616 selector = INTVAL (op1);
24617 if (selector < 0 || selector > 255)
24618 error ("the range of mask should be in 0 to 255");
24619 if (target == 0
24620 || GET_MODE (target) != tmode
24621 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24622 target = gen_reg_rtx (tmode);
24623 pat = GEN_FCN (icode) (target, op0, op1);
24624 if (! pat)
24625 return 0;
24626 emit_insn (pat);
24627 return target;
24629 case ARM_BUILTIN_WMADDS:
24630 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
24631 case ARM_BUILTIN_WMADDSX:
24632 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
24633 case ARM_BUILTIN_WMADDSN:
24634 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
24635 case ARM_BUILTIN_WMADDU:
24636 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
24637 case ARM_BUILTIN_WMADDUX:
24638 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
24639 case ARM_BUILTIN_WMADDUN:
24640 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
24641 case ARM_BUILTIN_WSADBZ:
24642 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
24643 case ARM_BUILTIN_WSADHZ:
24644 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
24646 /* Several three-argument builtins. */
24647 case ARM_BUILTIN_WMACS:
24648 case ARM_BUILTIN_WMACU:
24649 case ARM_BUILTIN_TMIA:
24650 case ARM_BUILTIN_TMIAPH:
24651 case ARM_BUILTIN_TMIATT:
24652 case ARM_BUILTIN_TMIATB:
24653 case ARM_BUILTIN_TMIABT:
24654 case ARM_BUILTIN_TMIABB:
24655 case ARM_BUILTIN_WQMIABB:
24656 case ARM_BUILTIN_WQMIABT:
24657 case ARM_BUILTIN_WQMIATB:
24658 case ARM_BUILTIN_WQMIATT:
24659 case ARM_BUILTIN_WQMIABBN:
24660 case ARM_BUILTIN_WQMIABTN:
24661 case ARM_BUILTIN_WQMIATBN:
24662 case ARM_BUILTIN_WQMIATTN:
24663 case ARM_BUILTIN_WMIABB:
24664 case ARM_BUILTIN_WMIABT:
24665 case ARM_BUILTIN_WMIATB:
24666 case ARM_BUILTIN_WMIATT:
24667 case ARM_BUILTIN_WMIABBN:
24668 case ARM_BUILTIN_WMIABTN:
24669 case ARM_BUILTIN_WMIATBN:
24670 case ARM_BUILTIN_WMIATTN:
24671 case ARM_BUILTIN_WMIAWBB:
24672 case ARM_BUILTIN_WMIAWBT:
24673 case ARM_BUILTIN_WMIAWTB:
24674 case ARM_BUILTIN_WMIAWTT:
24675 case ARM_BUILTIN_WMIAWBBN:
24676 case ARM_BUILTIN_WMIAWBTN:
24677 case ARM_BUILTIN_WMIAWTBN:
24678 case ARM_BUILTIN_WMIAWTTN:
24679 case ARM_BUILTIN_WSADB:
24680 case ARM_BUILTIN_WSADH:
24681 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
24682 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
24683 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
24684 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
24685 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
24686 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
24687 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
24688 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
24689 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
24690 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
24691 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
24692 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
24693 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
24694 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
24695 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
24696 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
24697 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
24698 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
24699 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
24700 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
24701 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
24702 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
24703 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
24704 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
24705 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
24706 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
24707 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
24708 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
24709 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
24710 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
24711 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
24712 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
24713 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
24714 : CODE_FOR_iwmmxt_wsadh);
24715 arg0 = CALL_EXPR_ARG (exp, 0);
24716 arg1 = CALL_EXPR_ARG (exp, 1);
24717 arg2 = CALL_EXPR_ARG (exp, 2);
24718 op0 = expand_normal (arg0);
24719 op1 = expand_normal (arg1);
24720 op2 = expand_normal (arg2);
24721 tmode = insn_data[icode].operand[0].mode;
24722 mode0 = insn_data[icode].operand[1].mode;
24723 mode1 = insn_data[icode].operand[2].mode;
24724 mode2 = insn_data[icode].operand[3].mode;
24726 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24727 op0 = copy_to_mode_reg (mode0, op0);
24728 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24729 op1 = copy_to_mode_reg (mode1, op1);
24730 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24731 op2 = copy_to_mode_reg (mode2, op2);
24732 if (target == 0
24733 || GET_MODE (target) != tmode
24734 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24735 target = gen_reg_rtx (tmode);
24736 pat = GEN_FCN (icode) (target, op0, op1, op2);
24737 if (! pat)
24738 return 0;
24739 emit_insn (pat);
24740 return target;
24742 case ARM_BUILTIN_WZERO:
24743 target = gen_reg_rtx (DImode);
24744 emit_insn (gen_iwmmxt_clrdi (target));
24745 return target;
24747 case ARM_BUILTIN_WSRLHI:
24748 case ARM_BUILTIN_WSRLWI:
24749 case ARM_BUILTIN_WSRLDI:
24750 case ARM_BUILTIN_WSLLHI:
24751 case ARM_BUILTIN_WSLLWI:
24752 case ARM_BUILTIN_WSLLDI:
24753 case ARM_BUILTIN_WSRAHI:
24754 case ARM_BUILTIN_WSRAWI:
24755 case ARM_BUILTIN_WSRADI:
24756 case ARM_BUILTIN_WRORHI:
24757 case ARM_BUILTIN_WRORWI:
24758 case ARM_BUILTIN_WRORDI:
24759 case ARM_BUILTIN_WSRLH:
24760 case ARM_BUILTIN_WSRLW:
24761 case ARM_BUILTIN_WSRLD:
24762 case ARM_BUILTIN_WSLLH:
24763 case ARM_BUILTIN_WSLLW:
24764 case ARM_BUILTIN_WSLLD:
24765 case ARM_BUILTIN_WSRAH:
24766 case ARM_BUILTIN_WSRAW:
24767 case ARM_BUILTIN_WSRAD:
24768 case ARM_BUILTIN_WRORH:
24769 case ARM_BUILTIN_WRORW:
24770 case ARM_BUILTIN_WRORD:
24771 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
24772 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
24773 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
24774 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
24775 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
24776 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
24777 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
24778 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
24779 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
24780 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
24781 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
24782 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
24783 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
24784 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
24785 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
24786 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
24787 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
24788 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
24789 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
24790 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
24791 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
24792 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
24793 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
24794 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
24795 : CODE_FOR_nothing);
24796 arg1 = CALL_EXPR_ARG (exp, 1);
24797 op1 = expand_normal (arg1);
24798 if (GET_MODE (op1) == VOIDmode)
24800 imm = INTVAL (op1);
24801 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
24802 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
24803 && (imm < 0 || imm > 32))
24805 if (fcode == ARM_BUILTIN_WRORHI)
24806 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
24807 else if (fcode == ARM_BUILTIN_WRORWI)
24808 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
24809 else if (fcode == ARM_BUILTIN_WRORH)
24810 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
24811 else
24812 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
24814 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
24815 && (imm < 0 || imm > 64))
24817 if (fcode == ARM_BUILTIN_WRORDI)
24818 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
24819 else
24820 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
24822 else if (imm < 0)
24824 if (fcode == ARM_BUILTIN_WSRLHI)
24825 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
24826 else if (fcode == ARM_BUILTIN_WSRLWI)
24827 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
24828 else if (fcode == ARM_BUILTIN_WSRLDI)
24829 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
24830 else if (fcode == ARM_BUILTIN_WSLLHI)
24831 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
24832 else if (fcode == ARM_BUILTIN_WSLLWI)
24833 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
24834 else if (fcode == ARM_BUILTIN_WSLLDI)
24835 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
24836 else if (fcode == ARM_BUILTIN_WSRAHI)
24837 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
24838 else if (fcode == ARM_BUILTIN_WSRAWI)
24839 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
24840 else if (fcode == ARM_BUILTIN_WSRADI)
24841 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
24842 else if (fcode == ARM_BUILTIN_WSRLH)
24843 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
24844 else if (fcode == ARM_BUILTIN_WSRLW)
24845 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
24846 else if (fcode == ARM_BUILTIN_WSRLD)
24847 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
24848 else if (fcode == ARM_BUILTIN_WSLLH)
24849 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
24850 else if (fcode == ARM_BUILTIN_WSLLW)
24851 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
24852 else if (fcode == ARM_BUILTIN_WSLLD)
24853 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
24854 else if (fcode == ARM_BUILTIN_WSRAH)
24855 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
24856 else if (fcode == ARM_BUILTIN_WSRAW)
24857 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
24858 else
24859 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
24862 return arm_expand_binop_builtin (icode, exp, target);
24864 default:
24865 break;
24868 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24869 if (d->code == (const enum arm_builtins) fcode)
24870 return arm_expand_binop_builtin (d->icode, exp, target);
24872 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
24873 if (d->code == (const enum arm_builtins) fcode)
24874 return arm_expand_unop_builtin (d->icode, exp, target, 0);
24876 /* @@@ Should really do something sensible here. */
24877 return NULL_RTX;
24880 /* Return the number (counting from 0) of
24881 the least significant set bit in MASK. */
24883 inline static int
24884 number_of_first_bit_set (unsigned mask)
24886 return ctz_hwi (mask);
24889 /* Like emit_multi_reg_push, but allowing for a different set of
24890 registers to be described as saved. MASK is the set of registers
24891 to be saved; REAL_REGS is the set of registers to be described as
24892 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24894 static rtx
24895 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24897 unsigned long regno;
24898 rtx par[10], tmp, reg, insn;
24899 int i, j;
24901 /* Build the parallel of the registers actually being stored. */
24902 for (i = 0; mask; ++i, mask &= mask - 1)
24904 regno = ctz_hwi (mask);
24905 reg = gen_rtx_REG (SImode, regno);
24907 if (i == 0)
24908 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24909 else
24910 tmp = gen_rtx_USE (VOIDmode, reg);
24912 par[i] = tmp;
24915 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24916 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24917 tmp = gen_frame_mem (BLKmode, tmp);
24918 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
24919 par[0] = tmp;
24921 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24922 insn = emit_insn (tmp);
24924 /* Always build the stack adjustment note for unwind info. */
24925 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24926 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
24927 par[0] = tmp;
24929 /* Build the parallel of the registers recorded as saved for unwind. */
24930 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24932 regno = ctz_hwi (real_regs);
24933 reg = gen_rtx_REG (SImode, regno);
24935 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24936 tmp = gen_frame_mem (SImode, tmp);
24937 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
24938 RTX_FRAME_RELATED_P (tmp) = 1;
24939 par[j + 1] = tmp;
24942 if (j == 0)
24943 tmp = par[0];
24944 else
24946 RTX_FRAME_RELATED_P (par[0]) = 1;
24947 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24950 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24952 return insn;
24955 /* Emit code to push or pop registers to or from the stack. F is the
24956 assembly file. MASK is the registers to pop. */
24957 static void
24958 thumb_pop (FILE *f, unsigned long mask)
24960 int regno;
24961 int lo_mask = mask & 0xFF;
24962 int pushed_words = 0;
24964 gcc_assert (mask);
24966 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24968 /* Special case. Do not generate a POP PC statement here, do it in
24969 thumb_exit() */
24970 thumb_exit (f, -1);
24971 return;
24974 fprintf (f, "\tpop\t{");
24976 /* Look at the low registers first. */
24977 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24979 if (lo_mask & 1)
24981 asm_fprintf (f, "%r", regno);
24983 if ((lo_mask & ~1) != 0)
24984 fprintf (f, ", ");
24986 pushed_words++;
24990 if (mask & (1 << PC_REGNUM))
24992 /* Catch popping the PC. */
24993 if (TARGET_INTERWORK || TARGET_BACKTRACE
24994 || crtl->calls_eh_return)
24996 /* The PC is never poped directly, instead
24997 it is popped into r3 and then BX is used. */
24998 fprintf (f, "}\n");
25000 thumb_exit (f, -1);
25002 return;
25004 else
25006 if (mask & 0xFF)
25007 fprintf (f, ", ");
25009 asm_fprintf (f, "%r", PC_REGNUM);
25013 fprintf (f, "}\n");
25016 /* Generate code to return from a thumb function.
25017 If 'reg_containing_return_addr' is -1, then the return address is
25018 actually on the stack, at the stack pointer. */
25019 static void
25020 thumb_exit (FILE *f, int reg_containing_return_addr)
25022 unsigned regs_available_for_popping;
25023 unsigned regs_to_pop;
25024 int pops_needed;
25025 unsigned available;
25026 unsigned required;
25027 int mode;
25028 int size;
25029 int restore_a4 = FALSE;
25031 /* Compute the registers we need to pop. */
25032 regs_to_pop = 0;
25033 pops_needed = 0;
25035 if (reg_containing_return_addr == -1)
25037 regs_to_pop |= 1 << LR_REGNUM;
25038 ++pops_needed;
25041 if (TARGET_BACKTRACE)
25043 /* Restore the (ARM) frame pointer and stack pointer. */
25044 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25045 pops_needed += 2;
25048 /* If there is nothing to pop then just emit the BX instruction and
25049 return. */
25050 if (pops_needed == 0)
25052 if (crtl->calls_eh_return)
25053 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25055 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25056 return;
25058 /* Otherwise if we are not supporting interworking and we have not created
25059 a backtrace structure and the function was not entered in ARM mode then
25060 just pop the return address straight into the PC. */
25061 else if (!TARGET_INTERWORK
25062 && !TARGET_BACKTRACE
25063 && !is_called_in_ARM_mode (current_function_decl)
25064 && !crtl->calls_eh_return)
25066 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25067 return;
25070 /* Find out how many of the (return) argument registers we can corrupt. */
25071 regs_available_for_popping = 0;
25073 /* If returning via __builtin_eh_return, the bottom three registers
25074 all contain information needed for the return. */
25075 if (crtl->calls_eh_return)
25076 size = 12;
25077 else
25079 /* If we can deduce the registers used from the function's
25080 return value. This is more reliable that examining
25081 df_regs_ever_live_p () because that will be set if the register is
25082 ever used in the function, not just if the register is used
25083 to hold a return value. */
25085 if (crtl->return_rtx != 0)
25086 mode = GET_MODE (crtl->return_rtx);
25087 else
25088 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25090 size = GET_MODE_SIZE (mode);
25092 if (size == 0)
25094 /* In a void function we can use any argument register.
25095 In a function that returns a structure on the stack
25096 we can use the second and third argument registers. */
25097 if (mode == VOIDmode)
25098 regs_available_for_popping =
25099 (1 << ARG_REGISTER (1))
25100 | (1 << ARG_REGISTER (2))
25101 | (1 << ARG_REGISTER (3));
25102 else
25103 regs_available_for_popping =
25104 (1 << ARG_REGISTER (2))
25105 | (1 << ARG_REGISTER (3));
25107 else if (size <= 4)
25108 regs_available_for_popping =
25109 (1 << ARG_REGISTER (2))
25110 | (1 << ARG_REGISTER (3));
25111 else if (size <= 8)
25112 regs_available_for_popping =
25113 (1 << ARG_REGISTER (3));
25116 /* Match registers to be popped with registers into which we pop them. */
25117 for (available = regs_available_for_popping,
25118 required = regs_to_pop;
25119 required != 0 && available != 0;
25120 available &= ~(available & - available),
25121 required &= ~(required & - required))
25122 -- pops_needed;
25124 /* If we have any popping registers left over, remove them. */
25125 if (available > 0)
25126 regs_available_for_popping &= ~available;
25128 /* Otherwise if we need another popping register we can use
25129 the fourth argument register. */
25130 else if (pops_needed)
25132 /* If we have not found any free argument registers and
25133 reg a4 contains the return address, we must move it. */
25134 if (regs_available_for_popping == 0
25135 && reg_containing_return_addr == LAST_ARG_REGNUM)
25137 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25138 reg_containing_return_addr = LR_REGNUM;
25140 else if (size > 12)
25142 /* Register a4 is being used to hold part of the return value,
25143 but we have dire need of a free, low register. */
25144 restore_a4 = TRUE;
25146 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25149 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25151 /* The fourth argument register is available. */
25152 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25154 --pops_needed;
25158 /* Pop as many registers as we can. */
25159 thumb_pop (f, regs_available_for_popping);
25161 /* Process the registers we popped. */
25162 if (reg_containing_return_addr == -1)
25164 /* The return address was popped into the lowest numbered register. */
25165 regs_to_pop &= ~(1 << LR_REGNUM);
25167 reg_containing_return_addr =
25168 number_of_first_bit_set (regs_available_for_popping);
25170 /* Remove this register for the mask of available registers, so that
25171 the return address will not be corrupted by further pops. */
25172 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25175 /* If we popped other registers then handle them here. */
25176 if (regs_available_for_popping)
25178 int frame_pointer;
25180 /* Work out which register currently contains the frame pointer. */
25181 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25183 /* Move it into the correct place. */
25184 asm_fprintf (f, "\tmov\t%r, %r\n",
25185 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25187 /* (Temporarily) remove it from the mask of popped registers. */
25188 regs_available_for_popping &= ~(1 << frame_pointer);
25189 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25191 if (regs_available_for_popping)
25193 int stack_pointer;
25195 /* We popped the stack pointer as well,
25196 find the register that contains it. */
25197 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25199 /* Move it into the stack register. */
25200 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25202 /* At this point we have popped all necessary registers, so
25203 do not worry about restoring regs_available_for_popping
25204 to its correct value:
25206 assert (pops_needed == 0)
25207 assert (regs_available_for_popping == (1 << frame_pointer))
25208 assert (regs_to_pop == (1 << STACK_POINTER)) */
25210 else
25212 /* Since we have just move the popped value into the frame
25213 pointer, the popping register is available for reuse, and
25214 we know that we still have the stack pointer left to pop. */
25215 regs_available_for_popping |= (1 << frame_pointer);
25219 /* If we still have registers left on the stack, but we no longer have
25220 any registers into which we can pop them, then we must move the return
25221 address into the link register and make available the register that
25222 contained it. */
25223 if (regs_available_for_popping == 0 && pops_needed > 0)
25225 regs_available_for_popping |= 1 << reg_containing_return_addr;
25227 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
25228 reg_containing_return_addr);
25230 reg_containing_return_addr = LR_REGNUM;
25233 /* If we have registers left on the stack then pop some more.
25234 We know that at most we will want to pop FP and SP. */
25235 if (pops_needed > 0)
25237 int popped_into;
25238 int move_to;
25240 thumb_pop (f, regs_available_for_popping);
25242 /* We have popped either FP or SP.
25243 Move whichever one it is into the correct register. */
25244 popped_into = number_of_first_bit_set (regs_available_for_popping);
25245 move_to = number_of_first_bit_set (regs_to_pop);
25247 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
25249 regs_to_pop &= ~(1 << move_to);
25251 --pops_needed;
25254 /* If we still have not popped everything then we must have only
25255 had one register available to us and we are now popping the SP. */
25256 if (pops_needed > 0)
25258 int popped_into;
25260 thumb_pop (f, regs_available_for_popping);
25262 popped_into = number_of_first_bit_set (regs_available_for_popping);
25264 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
25266 assert (regs_to_pop == (1 << STACK_POINTER))
25267 assert (pops_needed == 1)
25271 /* If necessary restore the a4 register. */
25272 if (restore_a4)
25274 if (reg_containing_return_addr != LR_REGNUM)
25276 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25277 reg_containing_return_addr = LR_REGNUM;
25280 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
25283 if (crtl->calls_eh_return)
25284 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25286 /* Return to caller. */
25287 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25290 /* Scan INSN just before assembler is output for it.
25291 For Thumb-1, we track the status of the condition codes; this
25292 information is used in the cbranchsi4_insn pattern. */
25293 void
25294 thumb1_final_prescan_insn (rtx insn)
25296 if (flag_print_asm_name)
25297 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25298 INSN_ADDRESSES (INSN_UID (insn)));
25299 /* Don't overwrite the previous setter when we get to a cbranch. */
25300 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25302 enum attr_conds conds;
25304 if (cfun->machine->thumb1_cc_insn)
25306 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25307 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25308 CC_STATUS_INIT;
25310 conds = get_attr_conds (insn);
25311 if (conds == CONDS_SET)
25313 rtx set = single_set (insn);
25314 cfun->machine->thumb1_cc_insn = insn;
25315 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25316 cfun->machine->thumb1_cc_op1 = const0_rtx;
25317 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
25318 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25320 rtx src1 = XEXP (SET_SRC (set), 1);
25321 if (src1 == const0_rtx)
25322 cfun->machine->thumb1_cc_mode = CCmode;
25324 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25326 /* Record the src register operand instead of dest because
25327 cprop_hardreg pass propagates src. */
25328 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25331 else if (conds != CONDS_NOCOND)
25332 cfun->machine->thumb1_cc_insn = NULL_RTX;
25335 /* Check if unexpected far jump is used. */
25336 if (cfun->machine->lr_save_eliminated
25337 && get_attr_far_jump (insn) == FAR_JUMP_YES)
25338 internal_error("Unexpected thumb1 far jump");
25342 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25344 unsigned HOST_WIDE_INT mask = 0xff;
25345 int i;
25347 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25348 if (val == 0) /* XXX */
25349 return 0;
25351 for (i = 0; i < 25; i++)
25352 if ((val & (mask << i)) == val)
25353 return 1;
25355 return 0;
25358 /* Returns nonzero if the current function contains,
25359 or might contain a far jump. */
25360 static int
25361 thumb_far_jump_used_p (void)
25363 rtx insn;
25364 bool far_jump = false;
25365 unsigned int func_size = 0;
25367 /* This test is only important for leaf functions. */
25368 /* assert (!leaf_function_p ()); */
25370 /* If we have already decided that far jumps may be used,
25371 do not bother checking again, and always return true even if
25372 it turns out that they are not being used. Once we have made
25373 the decision that far jumps are present (and that hence the link
25374 register will be pushed onto the stack) we cannot go back on it. */
25375 if (cfun->machine->far_jump_used)
25376 return 1;
25378 /* If this function is not being called from the prologue/epilogue
25379 generation code then it must be being called from the
25380 INITIAL_ELIMINATION_OFFSET macro. */
25381 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25383 /* In this case we know that we are being asked about the elimination
25384 of the arg pointer register. If that register is not being used,
25385 then there are no arguments on the stack, and we do not have to
25386 worry that a far jump might force the prologue to push the link
25387 register, changing the stack offsets. In this case we can just
25388 return false, since the presence of far jumps in the function will
25389 not affect stack offsets.
25391 If the arg pointer is live (or if it was live, but has now been
25392 eliminated and so set to dead) then we do have to test to see if
25393 the function might contain a far jump. This test can lead to some
25394 false negatives, since before reload is completed, then length of
25395 branch instructions is not known, so gcc defaults to returning their
25396 longest length, which in turn sets the far jump attribute to true.
25398 A false negative will not result in bad code being generated, but it
25399 will result in a needless push and pop of the link register. We
25400 hope that this does not occur too often.
25402 If we need doubleword stack alignment this could affect the other
25403 elimination offsets so we can't risk getting it wrong. */
25404 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25405 cfun->machine->arg_pointer_live = 1;
25406 else if (!cfun->machine->arg_pointer_live)
25407 return 0;
25410 /* Check to see if the function contains a branch
25411 insn with the far jump attribute set. */
25412 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25414 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25416 far_jump = true;
25418 func_size += get_attr_length (insn);
25421 /* Attribute far_jump will always be true for thumb1 before
25422 shorten_branch pass. So checking far_jump attribute before
25423 shorten_branch isn't much useful.
25425 Following heuristic tries to estimate more accurately if a far jump
25426 may finally be used. The heuristic is very conservative as there is
25427 no chance to roll-back the decision of not to use far jump.
25429 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25430 2-byte insn is associated with a 4 byte constant pool. Using
25431 function size 2048/3 as the threshold is conservative enough. */
25432 if (far_jump)
25434 if ((func_size * 3) >= 2048)
25436 /* Record the fact that we have decided that
25437 the function does use far jumps. */
25438 cfun->machine->far_jump_used = 1;
25439 return 1;
25443 return 0;
25446 /* Return nonzero if FUNC must be entered in ARM mode. */
25448 is_called_in_ARM_mode (tree func)
25450 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25452 /* Ignore the problem about functions whose address is taken. */
25453 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25454 return TRUE;
25456 #ifdef ARM_PE
25457 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25458 #else
25459 return FALSE;
25460 #endif
25463 /* Given the stack offsets and register mask in OFFSETS, decide how
25464 many additional registers to push instead of subtracting a constant
25465 from SP. For epilogues the principle is the same except we use pop.
25466 FOR_PROLOGUE indicates which we're generating. */
25467 static int
25468 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25470 HOST_WIDE_INT amount;
25471 unsigned long live_regs_mask = offsets->saved_regs_mask;
25472 /* Extract a mask of the ones we can give to the Thumb's push/pop
25473 instruction. */
25474 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25475 /* Then count how many other high registers will need to be pushed. */
25476 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25477 int n_free, reg_base, size;
25479 if (!for_prologue && frame_pointer_needed)
25480 amount = offsets->locals_base - offsets->saved_regs;
25481 else
25482 amount = offsets->outgoing_args - offsets->saved_regs;
25484 /* If the stack frame size is 512 exactly, we can save one load
25485 instruction, which should make this a win even when optimizing
25486 for speed. */
25487 if (!optimize_size && amount != 512)
25488 return 0;
25490 /* Can't do this if there are high registers to push. */
25491 if (high_regs_pushed != 0)
25492 return 0;
25494 /* Shouldn't do it in the prologue if no registers would normally
25495 be pushed at all. In the epilogue, also allow it if we'll have
25496 a pop insn for the PC. */
25497 if (l_mask == 0
25498 && (for_prologue
25499 || TARGET_BACKTRACE
25500 || (live_regs_mask & 1 << LR_REGNUM) == 0
25501 || TARGET_INTERWORK
25502 || crtl->args.pretend_args_size != 0))
25503 return 0;
25505 /* Don't do this if thumb_expand_prologue wants to emit instructions
25506 between the push and the stack frame allocation. */
25507 if (for_prologue
25508 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25509 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25510 return 0;
25512 reg_base = 0;
25513 n_free = 0;
25514 if (!for_prologue)
25516 size = arm_size_return_regs ();
25517 reg_base = ARM_NUM_INTS (size);
25518 live_regs_mask >>= reg_base;
25521 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25522 && (for_prologue || call_used_regs[reg_base + n_free]))
25524 live_regs_mask >>= 1;
25525 n_free++;
25528 if (n_free == 0)
25529 return 0;
25530 gcc_assert (amount / 4 * 4 == amount);
25532 if (amount >= 512 && (amount - n_free * 4) < 512)
25533 return (amount - 508) / 4;
25534 if (amount <= n_free * 4)
25535 return amount / 4;
25536 return 0;
25539 /* The bits which aren't usefully expanded as rtl. */
25540 const char *
25541 thumb1_unexpanded_epilogue (void)
25543 arm_stack_offsets *offsets;
25544 int regno;
25545 unsigned long live_regs_mask = 0;
25546 int high_regs_pushed = 0;
25547 int extra_pop;
25548 int had_to_push_lr;
25549 int size;
25551 if (cfun->machine->return_used_this_function != 0)
25552 return "";
25554 if (IS_NAKED (arm_current_func_type ()))
25555 return "";
25557 offsets = arm_get_frame_offsets ();
25558 live_regs_mask = offsets->saved_regs_mask;
25559 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25561 /* If we can deduce the registers used from the function's return value.
25562 This is more reliable that examining df_regs_ever_live_p () because that
25563 will be set if the register is ever used in the function, not just if
25564 the register is used to hold a return value. */
25565 size = arm_size_return_regs ();
25567 extra_pop = thumb1_extra_regs_pushed (offsets, false);
25568 if (extra_pop > 0)
25570 unsigned long extra_mask = (1 << extra_pop) - 1;
25571 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25574 /* The prolog may have pushed some high registers to use as
25575 work registers. e.g. the testsuite file:
25576 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25577 compiles to produce:
25578 push {r4, r5, r6, r7, lr}
25579 mov r7, r9
25580 mov r6, r8
25581 push {r6, r7}
25582 as part of the prolog. We have to undo that pushing here. */
25584 if (high_regs_pushed)
25586 unsigned long mask = live_regs_mask & 0xff;
25587 int next_hi_reg;
25589 /* The available low registers depend on the size of the value we are
25590 returning. */
25591 if (size <= 12)
25592 mask |= 1 << 3;
25593 if (size <= 8)
25594 mask |= 1 << 2;
25596 if (mask == 0)
25597 /* Oh dear! We have no low registers into which we can pop
25598 high registers! */
25599 internal_error
25600 ("no low registers available for popping high registers");
25602 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
25603 if (live_regs_mask & (1 << next_hi_reg))
25604 break;
25606 while (high_regs_pushed)
25608 /* Find lo register(s) into which the high register(s) can
25609 be popped. */
25610 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
25612 if (mask & (1 << regno))
25613 high_regs_pushed--;
25614 if (high_regs_pushed == 0)
25615 break;
25618 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
25620 /* Pop the values into the low register(s). */
25621 thumb_pop (asm_out_file, mask);
25623 /* Move the value(s) into the high registers. */
25624 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
25626 if (mask & (1 << regno))
25628 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25629 regno);
25631 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
25632 if (live_regs_mask & (1 << next_hi_reg))
25633 break;
25637 live_regs_mask &= ~0x0f00;
25640 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25641 live_regs_mask &= 0xff;
25643 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25645 /* Pop the return address into the PC. */
25646 if (had_to_push_lr)
25647 live_regs_mask |= 1 << PC_REGNUM;
25649 /* Either no argument registers were pushed or a backtrace
25650 structure was created which includes an adjusted stack
25651 pointer, so just pop everything. */
25652 if (live_regs_mask)
25653 thumb_pop (asm_out_file, live_regs_mask);
25655 /* We have either just popped the return address into the
25656 PC or it is was kept in LR for the entire function.
25657 Note that thumb_pop has already called thumb_exit if the
25658 PC was in the list. */
25659 if (!had_to_push_lr)
25660 thumb_exit (asm_out_file, LR_REGNUM);
25662 else
25664 /* Pop everything but the return address. */
25665 if (live_regs_mask)
25666 thumb_pop (asm_out_file, live_regs_mask);
25668 if (had_to_push_lr)
25670 if (size > 12)
25672 /* We have no free low regs, so save one. */
25673 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25674 LAST_ARG_REGNUM);
25677 /* Get the return address into a temporary register. */
25678 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25680 if (size > 12)
25682 /* Move the return address to lr. */
25683 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25684 LAST_ARG_REGNUM);
25685 /* Restore the low register. */
25686 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25687 IP_REGNUM);
25688 regno = LR_REGNUM;
25690 else
25691 regno = LAST_ARG_REGNUM;
25693 else
25694 regno = LR_REGNUM;
25696 /* Remove the argument registers that were pushed onto the stack. */
25697 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25698 SP_REGNUM, SP_REGNUM,
25699 crtl->args.pretend_args_size);
25701 thumb_exit (asm_out_file, regno);
25704 return "";
25707 /* Functions to save and restore machine-specific function data. */
25708 static struct machine_function *
25709 arm_init_machine_status (void)
25711 struct machine_function *machine;
25712 machine = ggc_alloc_cleared_machine_function ();
25714 #if ARM_FT_UNKNOWN != 0
25715 machine->func_type = ARM_FT_UNKNOWN;
25716 #endif
25717 return machine;
25720 /* Return an RTX indicating where the return address to the
25721 calling function can be found. */
25723 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25725 if (count != 0)
25726 return NULL_RTX;
25728 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25731 /* Do anything needed before RTL is emitted for each function. */
25732 void
25733 arm_init_expanders (void)
25735 /* Arrange to initialize and mark the machine per-function status. */
25736 init_machine_status = arm_init_machine_status;
25738 /* This is to stop the combine pass optimizing away the alignment
25739 adjustment of va_arg. */
25740 /* ??? It is claimed that this should not be necessary. */
25741 if (cfun)
25742 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25746 /* Like arm_compute_initial_elimination offset. Simpler because there
25747 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25748 to point at the base of the local variables after static stack
25749 space for a function has been allocated. */
25751 HOST_WIDE_INT
25752 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25754 arm_stack_offsets *offsets;
25756 offsets = arm_get_frame_offsets ();
25758 switch (from)
25760 case ARG_POINTER_REGNUM:
25761 switch (to)
25763 case STACK_POINTER_REGNUM:
25764 return offsets->outgoing_args - offsets->saved_args;
25766 case FRAME_POINTER_REGNUM:
25767 return offsets->soft_frame - offsets->saved_args;
25769 case ARM_HARD_FRAME_POINTER_REGNUM:
25770 return offsets->saved_regs - offsets->saved_args;
25772 case THUMB_HARD_FRAME_POINTER_REGNUM:
25773 return offsets->locals_base - offsets->saved_args;
25775 default:
25776 gcc_unreachable ();
25778 break;
25780 case FRAME_POINTER_REGNUM:
25781 switch (to)
25783 case STACK_POINTER_REGNUM:
25784 return offsets->outgoing_args - offsets->soft_frame;
25786 case ARM_HARD_FRAME_POINTER_REGNUM:
25787 return offsets->saved_regs - offsets->soft_frame;
25789 case THUMB_HARD_FRAME_POINTER_REGNUM:
25790 return offsets->locals_base - offsets->soft_frame;
25792 default:
25793 gcc_unreachable ();
25795 break;
25797 default:
25798 gcc_unreachable ();
25802 /* Generate the function's prologue. */
25804 void
25805 thumb1_expand_prologue (void)
25807 rtx insn;
25809 HOST_WIDE_INT amount;
25810 arm_stack_offsets *offsets;
25811 unsigned long func_type;
25812 int regno;
25813 unsigned long live_regs_mask;
25814 unsigned long l_mask;
25815 unsigned high_regs_pushed = 0;
25817 func_type = arm_current_func_type ();
25819 /* Naked functions don't have prologues. */
25820 if (IS_NAKED (func_type))
25821 return;
25823 if (IS_INTERRUPT (func_type))
25825 error ("interrupt Service Routines cannot be coded in Thumb mode");
25826 return;
25829 if (is_called_in_ARM_mode (current_function_decl))
25830 emit_insn (gen_prologue_thumb1_interwork ());
25832 offsets = arm_get_frame_offsets ();
25833 live_regs_mask = offsets->saved_regs_mask;
25835 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25836 l_mask = live_regs_mask & 0x40ff;
25837 /* Then count how many other high registers will need to be pushed. */
25838 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25840 if (crtl->args.pretend_args_size)
25842 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25844 if (cfun->machine->uses_anonymous_args)
25846 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25847 unsigned long mask;
25849 mask = 1ul << (LAST_ARG_REGNUM + 1);
25850 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25852 insn = thumb1_emit_multi_reg_push (mask, 0);
25854 else
25856 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25857 stack_pointer_rtx, x));
25859 RTX_FRAME_RELATED_P (insn) = 1;
25862 if (TARGET_BACKTRACE)
25864 HOST_WIDE_INT offset = 0;
25865 unsigned work_register;
25866 rtx work_reg, x, arm_hfp_rtx;
25868 /* We have been asked to create a stack backtrace structure.
25869 The code looks like this:
25871 0 .align 2
25872 0 func:
25873 0 sub SP, #16 Reserve space for 4 registers.
25874 2 push {R7} Push low registers.
25875 4 add R7, SP, #20 Get the stack pointer before the push.
25876 6 str R7, [SP, #8] Store the stack pointer
25877 (before reserving the space).
25878 8 mov R7, PC Get hold of the start of this code + 12.
25879 10 str R7, [SP, #16] Store it.
25880 12 mov R7, FP Get hold of the current frame pointer.
25881 14 str R7, [SP, #4] Store it.
25882 16 mov R7, LR Get hold of the current return address.
25883 18 str R7, [SP, #12] Store it.
25884 20 add R7, SP, #16 Point at the start of the
25885 backtrace structure.
25886 22 mov FP, R7 Put this value into the frame pointer. */
25888 work_register = thumb_find_work_register (live_regs_mask);
25889 work_reg = gen_rtx_REG (SImode, work_register);
25890 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25892 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25893 stack_pointer_rtx, GEN_INT (-16)));
25894 RTX_FRAME_RELATED_P (insn) = 1;
25896 if (l_mask)
25898 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25899 RTX_FRAME_RELATED_P (insn) = 1;
25901 offset = bit_count (l_mask) * UNITS_PER_WORD;
25904 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25905 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25907 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25908 x = gen_frame_mem (SImode, x);
25909 emit_move_insn (x, work_reg);
25911 /* Make sure that the instruction fetching the PC is in the right place
25912 to calculate "start of backtrace creation code + 12". */
25913 /* ??? The stores using the common WORK_REG ought to be enough to
25914 prevent the scheduler from doing anything weird. Failing that
25915 we could always move all of the following into an UNSPEC_VOLATILE. */
25916 if (l_mask)
25918 x = gen_rtx_REG (SImode, PC_REGNUM);
25919 emit_move_insn (work_reg, x);
25921 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25922 x = gen_frame_mem (SImode, x);
25923 emit_move_insn (x, work_reg);
25925 emit_move_insn (work_reg, arm_hfp_rtx);
25927 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25928 x = gen_frame_mem (SImode, x);
25929 emit_move_insn (x, work_reg);
25931 else
25933 emit_move_insn (work_reg, arm_hfp_rtx);
25935 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25936 x = gen_frame_mem (SImode, x);
25937 emit_move_insn (x, work_reg);
25939 x = gen_rtx_REG (SImode, PC_REGNUM);
25940 emit_move_insn (work_reg, x);
25942 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25943 x = gen_frame_mem (SImode, x);
25944 emit_move_insn (x, work_reg);
25947 x = gen_rtx_REG (SImode, LR_REGNUM);
25948 emit_move_insn (work_reg, x);
25950 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25951 x = gen_frame_mem (SImode, x);
25952 emit_move_insn (x, work_reg);
25954 x = GEN_INT (offset + 12);
25955 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25957 emit_move_insn (arm_hfp_rtx, work_reg);
25959 /* Optimization: If we are not pushing any low registers but we are going
25960 to push some high registers then delay our first push. This will just
25961 be a push of LR and we can combine it with the push of the first high
25962 register. */
25963 else if ((l_mask & 0xff) != 0
25964 || (high_regs_pushed == 0 && l_mask))
25966 unsigned long mask = l_mask;
25967 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25968 insn = thumb1_emit_multi_reg_push (mask, mask);
25969 RTX_FRAME_RELATED_P (insn) = 1;
25972 if (high_regs_pushed)
25974 unsigned pushable_regs;
25975 unsigned next_hi_reg;
25976 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25977 : crtl->args.info.nregs;
25978 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25980 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25981 if (live_regs_mask & (1 << next_hi_reg))
25982 break;
25984 /* Here we need to mask out registers used for passing arguments
25985 even if they can be pushed. This is to avoid using them to stash the high
25986 registers. Such kind of stash may clobber the use of arguments. */
25987 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
25989 if (pushable_regs == 0)
25990 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25992 while (high_regs_pushed > 0)
25994 unsigned long real_regs_mask = 0;
25996 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
25998 if (pushable_regs & (1 << regno))
26000 emit_move_insn (gen_rtx_REG (SImode, regno),
26001 gen_rtx_REG (SImode, next_hi_reg));
26003 high_regs_pushed --;
26004 real_regs_mask |= (1 << next_hi_reg);
26006 if (high_regs_pushed)
26008 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26009 next_hi_reg --)
26010 if (live_regs_mask & (1 << next_hi_reg))
26011 break;
26013 else
26015 pushable_regs &= ~((1 << regno) - 1);
26016 break;
26021 /* If we had to find a work register and we have not yet
26022 saved the LR then add it to the list of regs to push. */
26023 if (l_mask == (1 << LR_REGNUM))
26025 pushable_regs |= l_mask;
26026 real_regs_mask |= l_mask;
26027 l_mask = 0;
26030 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26031 RTX_FRAME_RELATED_P (insn) = 1;
26035 /* Load the pic register before setting the frame pointer,
26036 so we can use r7 as a temporary work register. */
26037 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26038 arm_load_pic_register (live_regs_mask);
26040 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26041 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26042 stack_pointer_rtx);
26044 if (flag_stack_usage_info)
26045 current_function_static_stack_size
26046 = offsets->outgoing_args - offsets->saved_args;
26048 amount = offsets->outgoing_args - offsets->saved_regs;
26049 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26050 if (amount)
26052 if (amount < 512)
26054 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26055 GEN_INT (- amount)));
26056 RTX_FRAME_RELATED_P (insn) = 1;
26058 else
26060 rtx reg, dwarf;
26062 /* The stack decrement is too big for an immediate value in a single
26063 insn. In theory we could issue multiple subtracts, but after
26064 three of them it becomes more space efficient to place the full
26065 value in the constant pool and load into a register. (Also the
26066 ARM debugger really likes to see only one stack decrement per
26067 function). So instead we look for a scratch register into which
26068 we can load the decrement, and then we subtract this from the
26069 stack pointer. Unfortunately on the thumb the only available
26070 scratch registers are the argument registers, and we cannot use
26071 these as they may hold arguments to the function. Instead we
26072 attempt to locate a call preserved register which is used by this
26073 function. If we can find one, then we know that it will have
26074 been pushed at the start of the prologue and so we can corrupt
26075 it now. */
26076 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26077 if (live_regs_mask & (1 << regno))
26078 break;
26080 gcc_assert(regno <= LAST_LO_REGNUM);
26082 reg = gen_rtx_REG (SImode, regno);
26084 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26086 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26087 stack_pointer_rtx, reg));
26089 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26090 plus_constant (Pmode, stack_pointer_rtx,
26091 -amount));
26092 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26093 RTX_FRAME_RELATED_P (insn) = 1;
26097 if (frame_pointer_needed)
26098 thumb_set_frame_pointer (offsets);
26100 /* If we are profiling, make sure no instructions are scheduled before
26101 the call to mcount. Similarly if the user has requested no
26102 scheduling in the prolog. Similarly if we want non-call exceptions
26103 using the EABI unwinder, to prevent faulting instructions from being
26104 swapped with a stack adjustment. */
26105 if (crtl->profile || !TARGET_SCHED_PROLOG
26106 || (arm_except_unwind_info (&global_options) == UI_TARGET
26107 && cfun->can_throw_non_call_exceptions))
26108 emit_insn (gen_blockage ());
26110 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26111 if (live_regs_mask & 0xff)
26112 cfun->machine->lr_save_eliminated = 0;
26115 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26116 POP instruction can be generated. LR should be replaced by PC. All
26117 the checks required are already done by USE_RETURN_INSN (). Hence,
26118 all we really need to check here is if single register is to be
26119 returned, or multiple register return. */
26120 void
26121 thumb2_expand_return (bool simple_return)
26123 int i, num_regs;
26124 unsigned long saved_regs_mask;
26125 arm_stack_offsets *offsets;
26127 offsets = arm_get_frame_offsets ();
26128 saved_regs_mask = offsets->saved_regs_mask;
26130 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26131 if (saved_regs_mask & (1 << i))
26132 num_regs++;
26134 if (!simple_return && saved_regs_mask)
26136 if (num_regs == 1)
26138 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26139 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26140 rtx addr = gen_rtx_MEM (SImode,
26141 gen_rtx_POST_INC (SImode,
26142 stack_pointer_rtx));
26143 set_mem_alias_set (addr, get_frame_alias_set ());
26144 XVECEXP (par, 0, 0) = ret_rtx;
26145 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26146 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26147 emit_jump_insn (par);
26149 else
26151 saved_regs_mask &= ~ (1 << LR_REGNUM);
26152 saved_regs_mask |= (1 << PC_REGNUM);
26153 arm_emit_multi_reg_pop (saved_regs_mask);
26156 else
26158 emit_jump_insn (simple_return_rtx);
26162 void
26163 thumb1_expand_epilogue (void)
26165 HOST_WIDE_INT amount;
26166 arm_stack_offsets *offsets;
26167 int regno;
26169 /* Naked functions don't have prologues. */
26170 if (IS_NAKED (arm_current_func_type ()))
26171 return;
26173 offsets = arm_get_frame_offsets ();
26174 amount = offsets->outgoing_args - offsets->saved_regs;
26176 if (frame_pointer_needed)
26178 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26179 amount = offsets->locals_base - offsets->saved_regs;
26181 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26183 gcc_assert (amount >= 0);
26184 if (amount)
26186 emit_insn (gen_blockage ());
26188 if (amount < 512)
26189 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26190 GEN_INT (amount)));
26191 else
26193 /* r3 is always free in the epilogue. */
26194 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26196 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26197 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26201 /* Emit a USE (stack_pointer_rtx), so that
26202 the stack adjustment will not be deleted. */
26203 emit_insn (gen_force_register_use (stack_pointer_rtx));
26205 if (crtl->profile || !TARGET_SCHED_PROLOG)
26206 emit_insn (gen_blockage ());
26208 /* Emit a clobber for each insn that will be restored in the epilogue,
26209 so that flow2 will get register lifetimes correct. */
26210 for (regno = 0; regno < 13; regno++)
26211 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
26212 emit_clobber (gen_rtx_REG (SImode, regno));
26214 if (! df_regs_ever_live_p (LR_REGNUM))
26215 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26218 /* Epilogue code for APCS frame. */
26219 static void
26220 arm_expand_epilogue_apcs_frame (bool really_return)
26222 unsigned long func_type;
26223 unsigned long saved_regs_mask;
26224 int num_regs = 0;
26225 int i;
26226 int floats_from_frame = 0;
26227 arm_stack_offsets *offsets;
26229 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26230 func_type = arm_current_func_type ();
26232 /* Get frame offsets for ARM. */
26233 offsets = arm_get_frame_offsets ();
26234 saved_regs_mask = offsets->saved_regs_mask;
26236 /* Find the offset of the floating-point save area in the frame. */
26237 floats_from_frame = offsets->saved_args - offsets->frame;
26239 /* Compute how many core registers saved and how far away the floats are. */
26240 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26241 if (saved_regs_mask & (1 << i))
26243 num_regs++;
26244 floats_from_frame += 4;
26247 if (TARGET_HARD_FLOAT && TARGET_VFP)
26249 int start_reg;
26251 /* The offset is from IP_REGNUM. */
26252 int saved_size = arm_get_vfp_saved_size ();
26253 if (saved_size > 0)
26255 floats_from_frame += saved_size;
26256 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
26257 hard_frame_pointer_rtx,
26258 GEN_INT (-floats_from_frame)));
26261 /* Generate VFP register multi-pop. */
26262 start_reg = FIRST_VFP_REGNUM;
26264 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26265 /* Look for a case where a reg does not need restoring. */
26266 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26267 && (!df_regs_ever_live_p (i + 1)
26268 || call_used_regs[i + 1]))
26270 if (start_reg != i)
26271 arm_emit_vfp_multi_reg_pop (start_reg,
26272 (i - start_reg) / 2,
26273 gen_rtx_REG (SImode,
26274 IP_REGNUM));
26275 start_reg = i + 2;
26278 /* Restore the remaining regs that we have discovered (or possibly
26279 even all of them, if the conditional in the for loop never
26280 fired). */
26281 if (start_reg != i)
26282 arm_emit_vfp_multi_reg_pop (start_reg,
26283 (i - start_reg) / 2,
26284 gen_rtx_REG (SImode, IP_REGNUM));
26287 if (TARGET_IWMMXT)
26289 /* The frame pointer is guaranteed to be non-double-word aligned, as
26290 it is set to double-word-aligned old_stack_pointer - 4. */
26291 rtx insn;
26292 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26294 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26295 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26297 rtx addr = gen_frame_mem (V2SImode,
26298 plus_constant (Pmode, hard_frame_pointer_rtx,
26299 - lrm_count * 4));
26300 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26301 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26302 gen_rtx_REG (V2SImode, i),
26303 NULL_RTX);
26304 lrm_count += 2;
26308 /* saved_regs_mask should contain IP which contains old stack pointer
26309 at the time of activation creation. Since SP and IP are adjacent registers,
26310 we can restore the value directly into SP. */
26311 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26312 saved_regs_mask &= ~(1 << IP_REGNUM);
26313 saved_regs_mask |= (1 << SP_REGNUM);
26315 /* There are two registers left in saved_regs_mask - LR and PC. We
26316 only need to restore LR (the return address), but to
26317 save time we can load it directly into PC, unless we need a
26318 special function exit sequence, or we are not really returning. */
26319 if (really_return
26320 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26321 && !crtl->calls_eh_return)
26322 /* Delete LR from the register mask, so that LR on
26323 the stack is loaded into the PC in the register mask. */
26324 saved_regs_mask &= ~(1 << LR_REGNUM);
26325 else
26326 saved_regs_mask &= ~(1 << PC_REGNUM);
26328 num_regs = bit_count (saved_regs_mask);
26329 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26331 /* Unwind the stack to just below the saved registers. */
26332 emit_insn (gen_addsi3 (stack_pointer_rtx,
26333 hard_frame_pointer_rtx,
26334 GEN_INT (- 4 * num_regs)));
26337 arm_emit_multi_reg_pop (saved_regs_mask);
26339 if (IS_INTERRUPT (func_type))
26341 /* Interrupt handlers will have pushed the
26342 IP onto the stack, so restore it now. */
26343 rtx insn;
26344 rtx addr = gen_rtx_MEM (SImode,
26345 gen_rtx_POST_INC (SImode,
26346 stack_pointer_rtx));
26347 set_mem_alias_set (addr, get_frame_alias_set ());
26348 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26349 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26350 gen_rtx_REG (SImode, IP_REGNUM),
26351 NULL_RTX);
26354 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26355 return;
26357 if (crtl->calls_eh_return)
26358 emit_insn (gen_addsi3 (stack_pointer_rtx,
26359 stack_pointer_rtx,
26360 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
26362 if (IS_STACKALIGN (func_type))
26363 /* Restore the original stack pointer. Before prologue, the stack was
26364 realigned and the original stack pointer saved in r0. For details,
26365 see comment in arm_expand_prologue. */
26366 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
26368 emit_jump_insn (simple_return_rtx);
26371 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26372 function is not a sibcall. */
26373 void
26374 arm_expand_epilogue (bool really_return)
26376 unsigned long func_type;
26377 unsigned long saved_regs_mask;
26378 int num_regs = 0;
26379 int i;
26380 int amount;
26381 arm_stack_offsets *offsets;
26383 func_type = arm_current_func_type ();
26385 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26386 let output_return_instruction take care of instruction emission if any. */
26387 if (IS_NAKED (func_type)
26388 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26390 if (really_return)
26391 emit_jump_insn (simple_return_rtx);
26392 return;
26395 /* If we are throwing an exception, then we really must be doing a
26396 return, so we can't tail-call. */
26397 gcc_assert (!crtl->calls_eh_return || really_return);
26399 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26401 arm_expand_epilogue_apcs_frame (really_return);
26402 return;
26405 /* Get frame offsets for ARM. */
26406 offsets = arm_get_frame_offsets ();
26407 saved_regs_mask = offsets->saved_regs_mask;
26408 num_regs = bit_count (saved_regs_mask);
26410 if (frame_pointer_needed)
26412 rtx insn;
26413 /* Restore stack pointer if necessary. */
26414 if (TARGET_ARM)
26416 /* In ARM mode, frame pointer points to first saved register.
26417 Restore stack pointer to last saved register. */
26418 amount = offsets->frame - offsets->saved_regs;
26420 /* Force out any pending memory operations that reference stacked data
26421 before stack de-allocation occurs. */
26422 emit_insn (gen_blockage ());
26423 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26424 hard_frame_pointer_rtx,
26425 GEN_INT (amount)));
26426 arm_add_cfa_adjust_cfa_note (insn, amount,
26427 stack_pointer_rtx,
26428 hard_frame_pointer_rtx);
26430 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26431 deleted. */
26432 emit_insn (gen_force_register_use (stack_pointer_rtx));
26434 else
26436 /* In Thumb-2 mode, the frame pointer points to the last saved
26437 register. */
26438 amount = offsets->locals_base - offsets->saved_regs;
26439 if (amount)
26441 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26442 hard_frame_pointer_rtx,
26443 GEN_INT (amount)));
26444 arm_add_cfa_adjust_cfa_note (insn, amount,
26445 hard_frame_pointer_rtx,
26446 hard_frame_pointer_rtx);
26449 /* Force out any pending memory operations that reference stacked data
26450 before stack de-allocation occurs. */
26451 emit_insn (gen_blockage ());
26452 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26453 hard_frame_pointer_rtx));
26454 arm_add_cfa_adjust_cfa_note (insn, 0,
26455 stack_pointer_rtx,
26456 hard_frame_pointer_rtx);
26457 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26458 deleted. */
26459 emit_insn (gen_force_register_use (stack_pointer_rtx));
26462 else
26464 /* Pop off outgoing args and local frame to adjust stack pointer to
26465 last saved register. */
26466 amount = offsets->outgoing_args - offsets->saved_regs;
26467 if (amount)
26469 rtx tmp;
26470 /* Force out any pending memory operations that reference stacked data
26471 before stack de-allocation occurs. */
26472 emit_insn (gen_blockage ());
26473 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26474 stack_pointer_rtx,
26475 GEN_INT (amount)));
26476 arm_add_cfa_adjust_cfa_note (tmp, amount,
26477 stack_pointer_rtx, stack_pointer_rtx);
26478 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26479 not deleted. */
26480 emit_insn (gen_force_register_use (stack_pointer_rtx));
26484 if (TARGET_HARD_FLOAT && TARGET_VFP)
26486 /* Generate VFP register multi-pop. */
26487 int end_reg = LAST_VFP_REGNUM + 1;
26489 /* Scan the registers in reverse order. We need to match
26490 any groupings made in the prologue and generate matching
26491 vldm operations. The need to match groups is because,
26492 unlike pop, vldm can only do consecutive regs. */
26493 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26494 /* Look for a case where a reg does not need restoring. */
26495 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26496 && (!df_regs_ever_live_p (i + 1)
26497 || call_used_regs[i + 1]))
26499 /* Restore the regs discovered so far (from reg+2 to
26500 end_reg). */
26501 if (end_reg > i + 2)
26502 arm_emit_vfp_multi_reg_pop (i + 2,
26503 (end_reg - (i + 2)) / 2,
26504 stack_pointer_rtx);
26505 end_reg = i;
26508 /* Restore the remaining regs that we have discovered (or possibly
26509 even all of them, if the conditional in the for loop never
26510 fired). */
26511 if (end_reg > i + 2)
26512 arm_emit_vfp_multi_reg_pop (i + 2,
26513 (end_reg - (i + 2)) / 2,
26514 stack_pointer_rtx);
26517 if (TARGET_IWMMXT)
26518 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26519 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26521 rtx insn;
26522 rtx addr = gen_rtx_MEM (V2SImode,
26523 gen_rtx_POST_INC (SImode,
26524 stack_pointer_rtx));
26525 set_mem_alias_set (addr, get_frame_alias_set ());
26526 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26527 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26528 gen_rtx_REG (V2SImode, i),
26529 NULL_RTX);
26530 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26531 stack_pointer_rtx, stack_pointer_rtx);
26534 if (saved_regs_mask)
26536 rtx insn;
26537 bool return_in_pc = false;
26539 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26540 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26541 && !IS_STACKALIGN (func_type)
26542 && really_return
26543 && crtl->args.pretend_args_size == 0
26544 && saved_regs_mask & (1 << LR_REGNUM)
26545 && !crtl->calls_eh_return)
26547 saved_regs_mask &= ~(1 << LR_REGNUM);
26548 saved_regs_mask |= (1 << PC_REGNUM);
26549 return_in_pc = true;
26552 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26554 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26555 if (saved_regs_mask & (1 << i))
26557 rtx addr = gen_rtx_MEM (SImode,
26558 gen_rtx_POST_INC (SImode,
26559 stack_pointer_rtx));
26560 set_mem_alias_set (addr, get_frame_alias_set ());
26562 if (i == PC_REGNUM)
26564 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26565 XVECEXP (insn, 0, 0) = ret_rtx;
26566 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
26567 gen_rtx_REG (SImode, i),
26568 addr);
26569 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26570 insn = emit_jump_insn (insn);
26572 else
26574 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26575 addr));
26576 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26577 gen_rtx_REG (SImode, i),
26578 NULL_RTX);
26579 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26580 stack_pointer_rtx,
26581 stack_pointer_rtx);
26585 else
26587 if (TARGET_LDRD
26588 && current_tune->prefer_ldrd_strd
26589 && !optimize_function_for_size_p (cfun))
26591 if (TARGET_THUMB2)
26592 thumb2_emit_ldrd_pop (saved_regs_mask);
26593 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26594 arm_emit_ldrd_pop (saved_regs_mask);
26595 else
26596 arm_emit_multi_reg_pop (saved_regs_mask);
26598 else
26599 arm_emit_multi_reg_pop (saved_regs_mask);
26602 if (return_in_pc == true)
26603 return;
26606 if (crtl->args.pretend_args_size)
26608 int i, j;
26609 rtx dwarf = NULL_RTX;
26610 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26611 stack_pointer_rtx,
26612 GEN_INT (crtl->args.pretend_args_size)));
26614 RTX_FRAME_RELATED_P (tmp) = 1;
26616 if (cfun->machine->uses_anonymous_args)
26618 /* Restore pretend args. Refer arm_expand_prologue on how to save
26619 pretend_args in stack. */
26620 int num_regs = crtl->args.pretend_args_size / 4;
26621 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26622 for (j = 0, i = 0; j < num_regs; i++)
26623 if (saved_regs_mask & (1 << i))
26625 rtx reg = gen_rtx_REG (SImode, i);
26626 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26627 j++;
26629 REG_NOTES (tmp) = dwarf;
26631 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
26632 stack_pointer_rtx, stack_pointer_rtx);
26635 if (!really_return)
26636 return;
26638 if (crtl->calls_eh_return)
26639 emit_insn (gen_addsi3 (stack_pointer_rtx,
26640 stack_pointer_rtx,
26641 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26643 if (IS_STACKALIGN (func_type))
26644 /* Restore the original stack pointer. Before prologue, the stack was
26645 realigned and the original stack pointer saved in r0. For details,
26646 see comment in arm_expand_prologue. */
26647 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
26649 emit_jump_insn (simple_return_rtx);
26652 /* Implementation of insn prologue_thumb1_interwork. This is the first
26653 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26655 const char *
26656 thumb1_output_interwork (void)
26658 const char * name;
26659 FILE *f = asm_out_file;
26661 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26662 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26663 == SYMBOL_REF);
26664 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26666 /* Generate code sequence to switch us into Thumb mode. */
26667 /* The .code 32 directive has already been emitted by
26668 ASM_DECLARE_FUNCTION_NAME. */
26669 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26670 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26672 /* Generate a label, so that the debugger will notice the
26673 change in instruction sets. This label is also used by
26674 the assembler to bypass the ARM code when this function
26675 is called from a Thumb encoded function elsewhere in the
26676 same file. Hence the definition of STUB_NAME here must
26677 agree with the definition in gas/config/tc-arm.c. */
26679 #define STUB_NAME ".real_start_of"
26681 fprintf (f, "\t.code\t16\n");
26682 #ifdef ARM_PE
26683 if (arm_dllexport_name_p (name))
26684 name = arm_strip_name_encoding (name);
26685 #endif
26686 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26687 fprintf (f, "\t.thumb_func\n");
26688 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26690 return "";
26693 /* Handle the case of a double word load into a low register from
26694 a computed memory address. The computed address may involve a
26695 register which is overwritten by the load. */
26696 const char *
26697 thumb_load_double_from_address (rtx *operands)
26699 rtx addr;
26700 rtx base;
26701 rtx offset;
26702 rtx arg1;
26703 rtx arg2;
26705 gcc_assert (REG_P (operands[0]));
26706 gcc_assert (MEM_P (operands[1]));
26708 /* Get the memory address. */
26709 addr = XEXP (operands[1], 0);
26711 /* Work out how the memory address is computed. */
26712 switch (GET_CODE (addr))
26714 case REG:
26715 operands[2] = adjust_address (operands[1], SImode, 4);
26717 if (REGNO (operands[0]) == REGNO (addr))
26719 output_asm_insn ("ldr\t%H0, %2", operands);
26720 output_asm_insn ("ldr\t%0, %1", operands);
26722 else
26724 output_asm_insn ("ldr\t%0, %1", operands);
26725 output_asm_insn ("ldr\t%H0, %2", operands);
26727 break;
26729 case CONST:
26730 /* Compute <address> + 4 for the high order load. */
26731 operands[2] = adjust_address (operands[1], SImode, 4);
26733 output_asm_insn ("ldr\t%0, %1", operands);
26734 output_asm_insn ("ldr\t%H0, %2", operands);
26735 break;
26737 case PLUS:
26738 arg1 = XEXP (addr, 0);
26739 arg2 = XEXP (addr, 1);
26741 if (CONSTANT_P (arg1))
26742 base = arg2, offset = arg1;
26743 else
26744 base = arg1, offset = arg2;
26746 gcc_assert (REG_P (base));
26748 /* Catch the case of <address> = <reg> + <reg> */
26749 if (REG_P (offset))
26751 int reg_offset = REGNO (offset);
26752 int reg_base = REGNO (base);
26753 int reg_dest = REGNO (operands[0]);
26755 /* Add the base and offset registers together into the
26756 higher destination register. */
26757 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26758 reg_dest + 1, reg_base, reg_offset);
26760 /* Load the lower destination register from the address in
26761 the higher destination register. */
26762 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26763 reg_dest, reg_dest + 1);
26765 /* Load the higher destination register from its own address
26766 plus 4. */
26767 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26768 reg_dest + 1, reg_dest + 1);
26770 else
26772 /* Compute <address> + 4 for the high order load. */
26773 operands[2] = adjust_address (operands[1], SImode, 4);
26775 /* If the computed address is held in the low order register
26776 then load the high order register first, otherwise always
26777 load the low order register first. */
26778 if (REGNO (operands[0]) == REGNO (base))
26780 output_asm_insn ("ldr\t%H0, %2", operands);
26781 output_asm_insn ("ldr\t%0, %1", operands);
26783 else
26785 output_asm_insn ("ldr\t%0, %1", operands);
26786 output_asm_insn ("ldr\t%H0, %2", operands);
26789 break;
26791 case LABEL_REF:
26792 /* With no registers to worry about we can just load the value
26793 directly. */
26794 operands[2] = adjust_address (operands[1], SImode, 4);
26796 output_asm_insn ("ldr\t%H0, %2", operands);
26797 output_asm_insn ("ldr\t%0, %1", operands);
26798 break;
26800 default:
26801 gcc_unreachable ();
26804 return "";
26807 const char *
26808 thumb_output_move_mem_multiple (int n, rtx *operands)
26810 rtx tmp;
26812 switch (n)
26814 case 2:
26815 if (REGNO (operands[4]) > REGNO (operands[5]))
26817 tmp = operands[4];
26818 operands[4] = operands[5];
26819 operands[5] = tmp;
26821 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26822 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26823 break;
26825 case 3:
26826 if (REGNO (operands[4]) > REGNO (operands[5]))
26828 tmp = operands[4];
26829 operands[4] = operands[5];
26830 operands[5] = tmp;
26832 if (REGNO (operands[5]) > REGNO (operands[6]))
26834 tmp = operands[5];
26835 operands[5] = operands[6];
26836 operands[6] = tmp;
26838 if (REGNO (operands[4]) > REGNO (operands[5]))
26840 tmp = operands[4];
26841 operands[4] = operands[5];
26842 operands[5] = tmp;
26845 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26846 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26847 break;
26849 default:
26850 gcc_unreachable ();
26853 return "";
26856 /* Output a call-via instruction for thumb state. */
26857 const char *
26858 thumb_call_via_reg (rtx reg)
26860 int regno = REGNO (reg);
26861 rtx *labelp;
26863 gcc_assert (regno < LR_REGNUM);
26865 /* If we are in the normal text section we can use a single instance
26866 per compilation unit. If we are doing function sections, then we need
26867 an entry per section, since we can't rely on reachability. */
26868 if (in_section == text_section)
26870 thumb_call_reg_needed = 1;
26872 if (thumb_call_via_label[regno] == NULL)
26873 thumb_call_via_label[regno] = gen_label_rtx ();
26874 labelp = thumb_call_via_label + regno;
26876 else
26878 if (cfun->machine->call_via[regno] == NULL)
26879 cfun->machine->call_via[regno] = gen_label_rtx ();
26880 labelp = cfun->machine->call_via + regno;
26883 output_asm_insn ("bl\t%a0", labelp);
26884 return "";
26887 /* Routines for generating rtl. */
26888 void
26889 thumb_expand_movmemqi (rtx *operands)
26891 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26892 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26893 HOST_WIDE_INT len = INTVAL (operands[2]);
26894 HOST_WIDE_INT offset = 0;
26896 while (len >= 12)
26898 emit_insn (gen_movmem12b (out, in, out, in));
26899 len -= 12;
26902 if (len >= 8)
26904 emit_insn (gen_movmem8b (out, in, out, in));
26905 len -= 8;
26908 if (len >= 4)
26910 rtx reg = gen_reg_rtx (SImode);
26911 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26912 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26913 len -= 4;
26914 offset += 4;
26917 if (len >= 2)
26919 rtx reg = gen_reg_rtx (HImode);
26920 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26921 plus_constant (Pmode, in,
26922 offset))));
26923 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26924 offset)),
26925 reg));
26926 len -= 2;
26927 offset += 2;
26930 if (len)
26932 rtx reg = gen_reg_rtx (QImode);
26933 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26934 plus_constant (Pmode, in,
26935 offset))));
26936 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26937 offset)),
26938 reg));
26942 void
26943 thumb_reload_out_hi (rtx *operands)
26945 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26948 /* Handle reading a half-word from memory during reload. */
26949 void
26950 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
26952 gcc_unreachable ();
26955 /* Return the length of a function name prefix
26956 that starts with the character 'c'. */
26957 static int
26958 arm_get_strip_length (int c)
26960 switch (c)
26962 ARM_NAME_ENCODING_LENGTHS
26963 default: return 0;
26967 /* Return a pointer to a function's name with any
26968 and all prefix encodings stripped from it. */
26969 const char *
26970 arm_strip_name_encoding (const char *name)
26972 int skip;
26974 while ((skip = arm_get_strip_length (* name)))
26975 name += skip;
26977 return name;
26980 /* If there is a '*' anywhere in the name's prefix, then
26981 emit the stripped name verbatim, otherwise prepend an
26982 underscore if leading underscores are being used. */
26983 void
26984 arm_asm_output_labelref (FILE *stream, const char *name)
26986 int skip;
26987 int verbatim = 0;
26989 while ((skip = arm_get_strip_length (* name)))
26991 verbatim |= (*name == '*');
26992 name += skip;
26995 if (verbatim)
26996 fputs (name, stream);
26997 else
26998 asm_fprintf (stream, "%U%s", name);
27001 /* This function is used to emit an EABI tag and its associated value.
27002 We emit the numerical value of the tag in case the assembler does not
27003 support textual tags. (Eg gas prior to 2.20). If requested we include
27004 the tag name in a comment so that anyone reading the assembler output
27005 will know which tag is being set.
27007 This function is not static because arm-c.c needs it too. */
27009 void
27010 arm_emit_eabi_attribute (const char *name, int num, int val)
27012 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27013 if (flag_verbose_asm || flag_debug_asm)
27014 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27015 asm_fprintf (asm_out_file, "\n");
27018 static void
27019 arm_file_start (void)
27021 int val;
27023 if (TARGET_UNIFIED_ASM)
27024 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27026 if (TARGET_BPABI)
27028 const char *fpu_name;
27029 if (arm_selected_arch)
27030 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27031 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27032 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27033 else
27034 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
27036 if (TARGET_SOFT_FLOAT)
27038 fpu_name = "softvfp";
27040 else
27042 fpu_name = arm_fpu_desc->name;
27043 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27045 if (TARGET_HARD_FLOAT)
27046 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27047 if (TARGET_HARD_FLOAT_ABI)
27048 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27051 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27053 /* Some of these attributes only apply when the corresponding features
27054 are used. However we don't have any easy way of figuring this out.
27055 Conservatively record the setting that would have been used. */
27057 if (flag_rounding_math)
27058 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27060 if (!flag_unsafe_math_optimizations)
27062 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27063 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27065 if (flag_signaling_nans)
27066 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27068 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27069 flag_finite_math_only ? 1 : 3);
27071 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27072 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27073 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27074 flag_short_enums ? 1 : 2);
27076 /* Tag_ABI_optimization_goals. */
27077 if (optimize_size)
27078 val = 4;
27079 else if (optimize >= 2)
27080 val = 2;
27081 else if (optimize)
27082 val = 1;
27083 else
27084 val = 6;
27085 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27087 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27088 unaligned_access);
27090 if (arm_fp16_format)
27091 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27092 (int) arm_fp16_format);
27094 if (arm_lang_output_object_attributes_hook)
27095 arm_lang_output_object_attributes_hook();
27098 default_file_start ();
27101 static void
27102 arm_file_end (void)
27104 int regno;
27106 if (NEED_INDICATE_EXEC_STACK)
27107 /* Add .note.GNU-stack. */
27108 file_end_indicate_exec_stack ();
27110 if (! thumb_call_reg_needed)
27111 return;
27113 switch_to_section (text_section);
27114 asm_fprintf (asm_out_file, "\t.code 16\n");
27115 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27117 for (regno = 0; regno < LR_REGNUM; regno++)
27119 rtx label = thumb_call_via_label[regno];
27121 if (label != 0)
27123 targetm.asm_out.internal_label (asm_out_file, "L",
27124 CODE_LABEL_NUMBER (label));
27125 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27130 #ifndef ARM_PE
27131 /* Symbols in the text segment can be accessed without indirecting via the
27132 constant pool; it may take an extra binary operation, but this is still
27133 faster than indirecting via memory. Don't do this when not optimizing,
27134 since we won't be calculating al of the offsets necessary to do this
27135 simplification. */
27137 static void
27138 arm_encode_section_info (tree decl, rtx rtl, int first)
27140 if (optimize > 0 && TREE_CONSTANT (decl))
27141 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27143 default_encode_section_info (decl, rtl, first);
27145 #endif /* !ARM_PE */
27147 static void
27148 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27150 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27151 && !strcmp (prefix, "L"))
27153 arm_ccfsm_state = 0;
27154 arm_target_insn = NULL;
27156 default_internal_label (stream, prefix, labelno);
27159 /* Output code to add DELTA to the first argument, and then jump
27160 to FUNCTION. Used for C++ multiple inheritance. */
27161 static void
27162 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
27163 HOST_WIDE_INT delta,
27164 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
27165 tree function)
27167 static int thunk_label = 0;
27168 char label[256];
27169 char labelpc[256];
27170 int mi_delta = delta;
27171 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27172 int shift = 0;
27173 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27174 ? 1 : 0);
27175 if (mi_delta < 0)
27176 mi_delta = - mi_delta;
27178 final_start_function (emit_barrier (), file, 1);
27180 if (TARGET_THUMB1)
27182 int labelno = thunk_label++;
27183 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27184 /* Thunks are entered in arm mode when avaiable. */
27185 if (TARGET_THUMB1_ONLY)
27187 /* push r3 so we can use it as a temporary. */
27188 /* TODO: Omit this save if r3 is not used. */
27189 fputs ("\tpush {r3}\n", file);
27190 fputs ("\tldr\tr3, ", file);
27192 else
27194 fputs ("\tldr\tr12, ", file);
27196 assemble_name (file, label);
27197 fputc ('\n', file);
27198 if (flag_pic)
27200 /* If we are generating PIC, the ldr instruction below loads
27201 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27202 the address of the add + 8, so we have:
27204 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27205 = target + 1.
27207 Note that we have "+ 1" because some versions of GNU ld
27208 don't set the low bit of the result for R_ARM_REL32
27209 relocations against thumb function symbols.
27210 On ARMv6M this is +4, not +8. */
27211 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27212 assemble_name (file, labelpc);
27213 fputs (":\n", file);
27214 if (TARGET_THUMB1_ONLY)
27216 /* This is 2 insns after the start of the thunk, so we know it
27217 is 4-byte aligned. */
27218 fputs ("\tadd\tr3, pc, r3\n", file);
27219 fputs ("\tmov r12, r3\n", file);
27221 else
27222 fputs ("\tadd\tr12, pc, r12\n", file);
27224 else if (TARGET_THUMB1_ONLY)
27225 fputs ("\tmov r12, r3\n", file);
27227 if (TARGET_THUMB1_ONLY)
27229 if (mi_delta > 255)
27231 fputs ("\tldr\tr3, ", file);
27232 assemble_name (file, label);
27233 fputs ("+4\n", file);
27234 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
27235 mi_op, this_regno, this_regno);
27237 else if (mi_delta != 0)
27239 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27240 mi_op, this_regno, this_regno,
27241 mi_delta);
27244 else
27246 /* TODO: Use movw/movt for large constants when available. */
27247 while (mi_delta != 0)
27249 if ((mi_delta & (3 << shift)) == 0)
27250 shift += 2;
27251 else
27253 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27254 mi_op, this_regno, this_regno,
27255 mi_delta & (0xff << shift));
27256 mi_delta &= ~(0xff << shift);
27257 shift += 8;
27261 if (TARGET_THUMB1)
27263 if (TARGET_THUMB1_ONLY)
27264 fputs ("\tpop\t{r3}\n", file);
27266 fprintf (file, "\tbx\tr12\n");
27267 ASM_OUTPUT_ALIGN (file, 2);
27268 assemble_name (file, label);
27269 fputs (":\n", file);
27270 if (flag_pic)
27272 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
27273 rtx tem = XEXP (DECL_RTL (function), 0);
27274 tem = plus_constant (GET_MODE (tem), tem, -7);
27275 tem = gen_rtx_MINUS (GET_MODE (tem),
27276 tem,
27277 gen_rtx_SYMBOL_REF (Pmode,
27278 ggc_strdup (labelpc)));
27279 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27281 else
27282 /* Output ".word .LTHUNKn". */
27283 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27285 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27286 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27288 else
27290 fputs ("\tb\t", file);
27291 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27292 if (NEED_PLT_RELOC)
27293 fputs ("(PLT)", file);
27294 fputc ('\n', file);
27297 final_end_function ();
27301 arm_emit_vector_const (FILE *file, rtx x)
27303 int i;
27304 const char * pattern;
27306 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27308 switch (GET_MODE (x))
27310 case V2SImode: pattern = "%08x"; break;
27311 case V4HImode: pattern = "%04x"; break;
27312 case V8QImode: pattern = "%02x"; break;
27313 default: gcc_unreachable ();
27316 fprintf (file, "0x");
27317 for (i = CONST_VECTOR_NUNITS (x); i--;)
27319 rtx element;
27321 element = CONST_VECTOR_ELT (x, i);
27322 fprintf (file, pattern, INTVAL (element));
27325 return 1;
27328 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27329 HFmode constant pool entries are actually loaded with ldr. */
27330 void
27331 arm_emit_fp16_const (rtx c)
27333 REAL_VALUE_TYPE r;
27334 long bits;
27336 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
27337 bits = real_to_target (NULL, &r, HFmode);
27338 if (WORDS_BIG_ENDIAN)
27339 assemble_zeros (2);
27340 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27341 if (!WORDS_BIG_ENDIAN)
27342 assemble_zeros (2);
27345 const char *
27346 arm_output_load_gr (rtx *operands)
27348 rtx reg;
27349 rtx offset;
27350 rtx wcgr;
27351 rtx sum;
27353 if (!MEM_P (operands [1])
27354 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27355 || !REG_P (reg = XEXP (sum, 0))
27356 || !CONST_INT_P (offset = XEXP (sum, 1))
27357 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27358 return "wldrw%?\t%0, %1";
27360 /* Fix up an out-of-range load of a GR register. */
27361 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27362 wcgr = operands[0];
27363 operands[0] = reg;
27364 output_asm_insn ("ldr%?\t%0, %1", operands);
27366 operands[0] = wcgr;
27367 operands[1] = reg;
27368 output_asm_insn ("tmcr%?\t%0, %1", operands);
27369 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27371 return "";
27374 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27376 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27377 named arg and all anonymous args onto the stack.
27378 XXX I know the prologue shouldn't be pushing registers, but it is faster
27379 that way. */
27381 static void
27382 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27383 enum machine_mode mode,
27384 tree type,
27385 int *pretend_size,
27386 int second_time ATTRIBUTE_UNUSED)
27388 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27389 int nregs;
27391 cfun->machine->uses_anonymous_args = 1;
27392 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27394 nregs = pcum->aapcs_ncrn;
27395 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
27396 nregs++;
27398 else
27399 nregs = pcum->nregs;
27401 if (nregs < NUM_ARG_REGS)
27402 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27405 /* We can't rely on the caller doing the proper promotion when
27406 using APCS or ATPCS. */
27408 static bool
27409 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27411 return !TARGET_AAPCS_BASED;
27414 static enum machine_mode
27415 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27416 enum machine_mode mode,
27417 int *punsignedp ATTRIBUTE_UNUSED,
27418 const_tree fntype ATTRIBUTE_UNUSED,
27419 int for_return ATTRIBUTE_UNUSED)
27421 if (GET_MODE_CLASS (mode) == MODE_INT
27422 && GET_MODE_SIZE (mode) < 4)
27423 return SImode;
27425 return mode;
27428 /* AAPCS based ABIs use short enums by default. */
27430 static bool
27431 arm_default_short_enums (void)
27433 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
27437 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27439 static bool
27440 arm_align_anon_bitfield (void)
27442 return TARGET_AAPCS_BASED;
27446 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27448 static tree
27449 arm_cxx_guard_type (void)
27451 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27455 /* The EABI says test the least significant bit of a guard variable. */
27457 static bool
27458 arm_cxx_guard_mask_bit (void)
27460 return TARGET_AAPCS_BASED;
27464 /* The EABI specifies that all array cookies are 8 bytes long. */
27466 static tree
27467 arm_get_cookie_size (tree type)
27469 tree size;
27471 if (!TARGET_AAPCS_BASED)
27472 return default_cxx_get_cookie_size (type);
27474 size = build_int_cst (sizetype, 8);
27475 return size;
27479 /* The EABI says that array cookies should also contain the element size. */
27481 static bool
27482 arm_cookie_has_size (void)
27484 return TARGET_AAPCS_BASED;
27488 /* The EABI says constructors and destructors should return a pointer to
27489 the object constructed/destroyed. */
27491 static bool
27492 arm_cxx_cdtor_returns_this (void)
27494 return TARGET_AAPCS_BASED;
27497 /* The EABI says that an inline function may never be the key
27498 method. */
27500 static bool
27501 arm_cxx_key_method_may_be_inline (void)
27503 return !TARGET_AAPCS_BASED;
27506 static void
27507 arm_cxx_determine_class_data_visibility (tree decl)
27509 if (!TARGET_AAPCS_BASED
27510 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27511 return;
27513 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27514 is exported. However, on systems without dynamic vague linkage,
27515 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27516 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27517 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27518 else
27519 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27520 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27523 static bool
27524 arm_cxx_class_data_always_comdat (void)
27526 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27527 vague linkage if the class has no key function. */
27528 return !TARGET_AAPCS_BASED;
27532 /* The EABI says __aeabi_atexit should be used to register static
27533 destructors. */
27535 static bool
27536 arm_cxx_use_aeabi_atexit (void)
27538 return TARGET_AAPCS_BASED;
27542 void
27543 arm_set_return_address (rtx source, rtx scratch)
27545 arm_stack_offsets *offsets;
27546 HOST_WIDE_INT delta;
27547 rtx addr;
27548 unsigned long saved_regs;
27550 offsets = arm_get_frame_offsets ();
27551 saved_regs = offsets->saved_regs_mask;
27553 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27554 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27555 else
27557 if (frame_pointer_needed)
27558 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27559 else
27561 /* LR will be the first saved register. */
27562 delta = offsets->outgoing_args - (offsets->frame + 4);
27565 if (delta >= 4096)
27567 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27568 GEN_INT (delta & ~4095)));
27569 addr = scratch;
27570 delta &= 4095;
27572 else
27573 addr = stack_pointer_rtx;
27575 addr = plus_constant (Pmode, addr, delta);
27577 emit_move_insn (gen_frame_mem (Pmode, addr), source);
27582 void
27583 thumb_set_return_address (rtx source, rtx scratch)
27585 arm_stack_offsets *offsets;
27586 HOST_WIDE_INT delta;
27587 HOST_WIDE_INT limit;
27588 int reg;
27589 rtx addr;
27590 unsigned long mask;
27592 emit_use (source);
27594 offsets = arm_get_frame_offsets ();
27595 mask = offsets->saved_regs_mask;
27596 if (mask & (1 << LR_REGNUM))
27598 limit = 1024;
27599 /* Find the saved regs. */
27600 if (frame_pointer_needed)
27602 delta = offsets->soft_frame - offsets->saved_args;
27603 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27604 if (TARGET_THUMB1)
27605 limit = 128;
27607 else
27609 delta = offsets->outgoing_args - offsets->saved_args;
27610 reg = SP_REGNUM;
27612 /* Allow for the stack frame. */
27613 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27614 delta -= 16;
27615 /* The link register is always the first saved register. */
27616 delta -= 4;
27618 /* Construct the address. */
27619 addr = gen_rtx_REG (SImode, reg);
27620 if (delta > limit)
27622 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27623 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27624 addr = scratch;
27626 else
27627 addr = plus_constant (Pmode, addr, delta);
27629 emit_move_insn (gen_frame_mem (Pmode, addr), source);
27631 else
27632 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27635 /* Implements target hook vector_mode_supported_p. */
27636 bool
27637 arm_vector_mode_supported_p (enum machine_mode mode)
27639 /* Neon also supports V2SImode, etc. listed in the clause below. */
27640 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27641 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
27642 return true;
27644 if ((TARGET_NEON || TARGET_IWMMXT)
27645 && ((mode == V2SImode)
27646 || (mode == V4HImode)
27647 || (mode == V8QImode)))
27648 return true;
27650 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27651 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27652 || mode == V2HAmode))
27653 return true;
27655 return false;
27658 /* Implements target hook array_mode_supported_p. */
27660 static bool
27661 arm_array_mode_supported_p (enum machine_mode mode,
27662 unsigned HOST_WIDE_INT nelems)
27664 if (TARGET_NEON
27665 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27666 && (nelems >= 2 && nelems <= 4))
27667 return true;
27669 return false;
27672 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27673 registers when autovectorizing for Neon, at least until multiple vector
27674 widths are supported properly by the middle-end. */
27676 static enum machine_mode
27677 arm_preferred_simd_mode (enum machine_mode mode)
27679 if (TARGET_NEON)
27680 switch (mode)
27682 case SFmode:
27683 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27684 case SImode:
27685 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27686 case HImode:
27687 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27688 case QImode:
27689 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27690 case DImode:
27691 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27692 return V2DImode;
27693 break;
27695 default:;
27698 if (TARGET_REALLY_IWMMXT)
27699 switch (mode)
27701 case SImode:
27702 return V2SImode;
27703 case HImode:
27704 return V4HImode;
27705 case QImode:
27706 return V8QImode;
27708 default:;
27711 return word_mode;
27714 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27716 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27717 using r0-r4 for function arguments, r7 for the stack frame and don't have
27718 enough left over to do doubleword arithmetic. For Thumb-2 all the
27719 potentially problematic instructions accept high registers so this is not
27720 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27721 that require many low registers. */
27722 static bool
27723 arm_class_likely_spilled_p (reg_class_t rclass)
27725 if ((TARGET_THUMB1 && rclass == LO_REGS)
27726 || rclass == CC_REG)
27727 return true;
27729 return false;
27732 /* Implements target hook small_register_classes_for_mode_p. */
27733 bool
27734 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
27736 return TARGET_THUMB1;
27739 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27740 ARM insns and therefore guarantee that the shift count is modulo 256.
27741 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27742 guarantee no particular behavior for out-of-range counts. */
27744 static unsigned HOST_WIDE_INT
27745 arm_shift_truncation_mask (enum machine_mode mode)
27747 return mode == SImode ? 255 : 0;
27751 /* Map internal gcc register numbers to DWARF2 register numbers. */
27753 unsigned int
27754 arm_dbx_register_number (unsigned int regno)
27756 if (regno < 16)
27757 return regno;
27759 if (IS_VFP_REGNUM (regno))
27761 /* See comment in arm_dwarf_register_span. */
27762 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27763 return 64 + regno - FIRST_VFP_REGNUM;
27764 else
27765 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27768 if (IS_IWMMXT_GR_REGNUM (regno))
27769 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27771 if (IS_IWMMXT_REGNUM (regno))
27772 return 112 + regno - FIRST_IWMMXT_REGNUM;
27774 gcc_unreachable ();
27777 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27778 GCC models tham as 64 32-bit registers, so we need to describe this to
27779 the DWARF generation code. Other registers can use the default. */
27780 static rtx
27781 arm_dwarf_register_span (rtx rtl)
27783 unsigned regno;
27784 int nregs;
27785 int i;
27786 rtx p;
27788 regno = REGNO (rtl);
27789 if (!IS_VFP_REGNUM (regno))
27790 return NULL_RTX;
27792 /* XXX FIXME: The EABI defines two VFP register ranges:
27793 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27794 256-287: D0-D31
27795 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27796 corresponding D register. Until GDB supports this, we shall use the
27797 legacy encodings. We also use these encodings for D0-D15 for
27798 compatibility with older debuggers. */
27799 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27800 return NULL_RTX;
27802 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
27803 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
27804 for (i = 0; i < nregs; i++)
27805 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
27807 return p;
27810 #if ARM_UNWIND_INFO
27811 /* Emit unwind directives for a store-multiple instruction or stack pointer
27812 push during alignment.
27813 These should only ever be generated by the function prologue code, so
27814 expect them to have a particular form. */
27816 static void
27817 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27819 int i;
27820 HOST_WIDE_INT offset;
27821 HOST_WIDE_INT nregs;
27822 int reg_size;
27823 unsigned reg;
27824 unsigned lastreg;
27825 rtx e;
27827 e = XVECEXP (p, 0, 0);
27828 if (GET_CODE (e) != SET)
27829 abort ();
27831 /* First insn will adjust the stack pointer. */
27832 if (GET_CODE (e) != SET
27833 || !REG_P (XEXP (e, 0))
27834 || REGNO (XEXP (e, 0)) != SP_REGNUM
27835 || GET_CODE (XEXP (e, 1)) != PLUS)
27836 abort ();
27838 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
27839 nregs = XVECLEN (p, 0) - 1;
27841 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
27842 if (reg < 16)
27844 /* The function prologue may also push pc, but not annotate it as it is
27845 never restored. We turn this into a stack pointer adjustment. */
27846 if (nregs * 4 == offset - 4)
27848 fprintf (asm_out_file, "\t.pad #4\n");
27849 offset -= 4;
27851 reg_size = 4;
27852 fprintf (asm_out_file, "\t.save {");
27854 else if (IS_VFP_REGNUM (reg))
27856 reg_size = 8;
27857 fprintf (asm_out_file, "\t.vsave {");
27859 else
27860 /* Unknown register type. */
27861 abort ();
27863 /* If the stack increment doesn't match the size of the saved registers,
27864 something has gone horribly wrong. */
27865 if (offset != nregs * reg_size)
27866 abort ();
27868 offset = 0;
27869 lastreg = 0;
27870 /* The remaining insns will describe the stores. */
27871 for (i = 1; i <= nregs; i++)
27873 /* Expect (set (mem <addr>) (reg)).
27874 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27875 e = XVECEXP (p, 0, i);
27876 if (GET_CODE (e) != SET
27877 || !MEM_P (XEXP (e, 0))
27878 || !REG_P (XEXP (e, 1)))
27879 abort ();
27881 reg = REGNO (XEXP (e, 1));
27882 if (reg < lastreg)
27883 abort ();
27885 if (i != 1)
27886 fprintf (asm_out_file, ", ");
27887 /* We can't use %r for vfp because we need to use the
27888 double precision register names. */
27889 if (IS_VFP_REGNUM (reg))
27890 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27891 else
27892 asm_fprintf (asm_out_file, "%r", reg);
27894 #ifdef ENABLE_CHECKING
27895 /* Check that the addresses are consecutive. */
27896 e = XEXP (XEXP (e, 0), 0);
27897 if (GET_CODE (e) == PLUS)
27899 offset += reg_size;
27900 if (!REG_P (XEXP (e, 0))
27901 || REGNO (XEXP (e, 0)) != SP_REGNUM
27902 || !CONST_INT_P (XEXP (e, 1))
27903 || offset != INTVAL (XEXP (e, 1)))
27904 abort ();
27906 else if (i != 1
27907 || !REG_P (e)
27908 || REGNO (e) != SP_REGNUM)
27909 abort ();
27910 #endif
27912 fprintf (asm_out_file, "}\n");
27915 /* Emit unwind directives for a SET. */
27917 static void
27918 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27920 rtx e0;
27921 rtx e1;
27922 unsigned reg;
27924 e0 = XEXP (p, 0);
27925 e1 = XEXP (p, 1);
27926 switch (GET_CODE (e0))
27928 case MEM:
27929 /* Pushing a single register. */
27930 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27931 || !REG_P (XEXP (XEXP (e0, 0), 0))
27932 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27933 abort ();
27935 asm_fprintf (asm_out_file, "\t.save ");
27936 if (IS_VFP_REGNUM (REGNO (e1)))
27937 asm_fprintf(asm_out_file, "{d%d}\n",
27938 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27939 else
27940 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27941 break;
27943 case REG:
27944 if (REGNO (e0) == SP_REGNUM)
27946 /* A stack increment. */
27947 if (GET_CODE (e1) != PLUS
27948 || !REG_P (XEXP (e1, 0))
27949 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27950 || !CONST_INT_P (XEXP (e1, 1)))
27951 abort ();
27953 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27954 -INTVAL (XEXP (e1, 1)));
27956 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27958 HOST_WIDE_INT offset;
27960 if (GET_CODE (e1) == PLUS)
27962 if (!REG_P (XEXP (e1, 0))
27963 || !CONST_INT_P (XEXP (e1, 1)))
27964 abort ();
27965 reg = REGNO (XEXP (e1, 0));
27966 offset = INTVAL (XEXP (e1, 1));
27967 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27968 HARD_FRAME_POINTER_REGNUM, reg,
27969 offset);
27971 else if (REG_P (e1))
27973 reg = REGNO (e1);
27974 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27975 HARD_FRAME_POINTER_REGNUM, reg);
27977 else
27978 abort ();
27980 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27982 /* Move from sp to reg. */
27983 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27985 else if (GET_CODE (e1) == PLUS
27986 && REG_P (XEXP (e1, 0))
27987 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27988 && CONST_INT_P (XEXP (e1, 1)))
27990 /* Set reg to offset from sp. */
27991 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27992 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27994 else
27995 abort ();
27996 break;
27998 default:
27999 abort ();
28004 /* Emit unwind directives for the given insn. */
28006 static void
28007 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28009 rtx note, pat;
28010 bool handled_one = false;
28012 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28013 return;
28015 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28016 && (TREE_NOTHROW (current_function_decl)
28017 || crtl->all_throwers_are_sibcalls))
28018 return;
28020 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28021 return;
28023 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28025 switch (REG_NOTE_KIND (note))
28027 case REG_FRAME_RELATED_EXPR:
28028 pat = XEXP (note, 0);
28029 goto found;
28031 case REG_CFA_REGISTER:
28032 pat = XEXP (note, 0);
28033 if (pat == NULL)
28035 pat = PATTERN (insn);
28036 if (GET_CODE (pat) == PARALLEL)
28037 pat = XVECEXP (pat, 0, 0);
28040 /* Only emitted for IS_STACKALIGN re-alignment. */
28042 rtx dest, src;
28043 unsigned reg;
28045 src = SET_SRC (pat);
28046 dest = SET_DEST (pat);
28048 gcc_assert (src == stack_pointer_rtx);
28049 reg = REGNO (dest);
28050 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28051 reg + 0x90, reg);
28053 handled_one = true;
28054 break;
28056 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28057 to get correct dwarf information for shrink-wrap. We should not
28058 emit unwind information for it because these are used either for
28059 pretend arguments or notes to adjust sp and restore registers from
28060 stack. */
28061 case REG_CFA_ADJUST_CFA:
28062 case REG_CFA_RESTORE:
28063 return;
28065 case REG_CFA_DEF_CFA:
28066 case REG_CFA_EXPRESSION:
28067 case REG_CFA_OFFSET:
28068 /* ??? Only handling here what we actually emit. */
28069 gcc_unreachable ();
28071 default:
28072 break;
28075 if (handled_one)
28076 return;
28077 pat = PATTERN (insn);
28078 found:
28080 switch (GET_CODE (pat))
28082 case SET:
28083 arm_unwind_emit_set (asm_out_file, pat);
28084 break;
28086 case SEQUENCE:
28087 /* Store multiple. */
28088 arm_unwind_emit_sequence (asm_out_file, pat);
28089 break;
28091 default:
28092 abort();
28097 /* Output a reference from a function exception table to the type_info
28098 object X. The EABI specifies that the symbol should be relocated by
28099 an R_ARM_TARGET2 relocation. */
28101 static bool
28102 arm_output_ttype (rtx x)
28104 fputs ("\t.word\t", asm_out_file);
28105 output_addr_const (asm_out_file, x);
28106 /* Use special relocations for symbol references. */
28107 if (!CONST_INT_P (x))
28108 fputs ("(TARGET2)", asm_out_file);
28109 fputc ('\n', asm_out_file);
28111 return TRUE;
28114 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28116 static void
28117 arm_asm_emit_except_personality (rtx personality)
28119 fputs ("\t.personality\t", asm_out_file);
28120 output_addr_const (asm_out_file, personality);
28121 fputc ('\n', asm_out_file);
28124 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28126 static void
28127 arm_asm_init_sections (void)
28129 exception_section = get_unnamed_section (0, output_section_asm_op,
28130 "\t.handlerdata");
28132 #endif /* ARM_UNWIND_INFO */
28134 /* Output unwind directives for the start/end of a function. */
28136 void
28137 arm_output_fn_unwind (FILE * f, bool prologue)
28139 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28140 return;
28142 if (prologue)
28143 fputs ("\t.fnstart\n", f);
28144 else
28146 /* If this function will never be unwound, then mark it as such.
28147 The came condition is used in arm_unwind_emit to suppress
28148 the frame annotations. */
28149 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28150 && (TREE_NOTHROW (current_function_decl)
28151 || crtl->all_throwers_are_sibcalls))
28152 fputs("\t.cantunwind\n", f);
28154 fputs ("\t.fnend\n", f);
28158 static bool
28159 arm_emit_tls_decoration (FILE *fp, rtx x)
28161 enum tls_reloc reloc;
28162 rtx val;
28164 val = XVECEXP (x, 0, 0);
28165 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28167 output_addr_const (fp, val);
28169 switch (reloc)
28171 case TLS_GD32:
28172 fputs ("(tlsgd)", fp);
28173 break;
28174 case TLS_LDM32:
28175 fputs ("(tlsldm)", fp);
28176 break;
28177 case TLS_LDO32:
28178 fputs ("(tlsldo)", fp);
28179 break;
28180 case TLS_IE32:
28181 fputs ("(gottpoff)", fp);
28182 break;
28183 case TLS_LE32:
28184 fputs ("(tpoff)", fp);
28185 break;
28186 case TLS_DESCSEQ:
28187 fputs ("(tlsdesc)", fp);
28188 break;
28189 default:
28190 gcc_unreachable ();
28193 switch (reloc)
28195 case TLS_GD32:
28196 case TLS_LDM32:
28197 case TLS_IE32:
28198 case TLS_DESCSEQ:
28199 fputs (" + (. - ", fp);
28200 output_addr_const (fp, XVECEXP (x, 0, 2));
28201 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28202 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28203 output_addr_const (fp, XVECEXP (x, 0, 3));
28204 fputc (')', fp);
28205 break;
28206 default:
28207 break;
28210 return TRUE;
28213 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28215 static void
28216 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28218 gcc_assert (size == 4);
28219 fputs ("\t.word\t", file);
28220 output_addr_const (file, x);
28221 fputs ("(tlsldo)", file);
28224 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28226 static bool
28227 arm_output_addr_const_extra (FILE *fp, rtx x)
28229 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28230 return arm_emit_tls_decoration (fp, x);
28231 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28233 char label[256];
28234 int labelno = INTVAL (XVECEXP (x, 0, 0));
28236 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28237 assemble_name_raw (fp, label);
28239 return TRUE;
28241 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28243 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28244 if (GOT_PCREL)
28245 fputs ("+.", fp);
28246 fputs ("-(", fp);
28247 output_addr_const (fp, XVECEXP (x, 0, 0));
28248 fputc (')', fp);
28249 return TRUE;
28251 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28253 output_addr_const (fp, XVECEXP (x, 0, 0));
28254 if (GOT_PCREL)
28255 fputs ("+.", fp);
28256 fputs ("-(", fp);
28257 output_addr_const (fp, XVECEXP (x, 0, 1));
28258 fputc (')', fp);
28259 return TRUE;
28261 else if (GET_CODE (x) == CONST_VECTOR)
28262 return arm_emit_vector_const (fp, x);
28264 return FALSE;
28267 /* Output assembly for a shift instruction.
28268 SET_FLAGS determines how the instruction modifies the condition codes.
28269 0 - Do not set condition codes.
28270 1 - Set condition codes.
28271 2 - Use smallest instruction. */
28272 const char *
28273 arm_output_shift(rtx * operands, int set_flags)
28275 char pattern[100];
28276 static const char flag_chars[3] = {'?', '.', '!'};
28277 const char *shift;
28278 HOST_WIDE_INT val;
28279 char c;
28281 c = flag_chars[set_flags];
28282 if (TARGET_UNIFIED_ASM)
28284 shift = shift_op(operands[3], &val);
28285 if (shift)
28287 if (val != -1)
28288 operands[2] = GEN_INT(val);
28289 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28291 else
28292 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28294 else
28295 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
28296 output_asm_insn (pattern, operands);
28297 return "";
28300 /* Output assembly for a WMMX immediate shift instruction. */
28301 const char *
28302 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28304 int shift = INTVAL (operands[2]);
28305 char templ[50];
28306 enum machine_mode opmode = GET_MODE (operands[0]);
28308 gcc_assert (shift >= 0);
28310 /* If the shift value in the register versions is > 63 (for D qualifier),
28311 31 (for W qualifier) or 15 (for H qualifier). */
28312 if (((opmode == V4HImode) && (shift > 15))
28313 || ((opmode == V2SImode) && (shift > 31))
28314 || ((opmode == DImode) && (shift > 63)))
28316 if (wror_or_wsra)
28318 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28319 output_asm_insn (templ, operands);
28320 if (opmode == DImode)
28322 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28323 output_asm_insn (templ, operands);
28326 else
28328 /* The destination register will contain all zeros. */
28329 sprintf (templ, "wzero\t%%0");
28330 output_asm_insn (templ, operands);
28332 return "";
28335 if ((opmode == DImode) && (shift > 32))
28337 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28338 output_asm_insn (templ, operands);
28339 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28340 output_asm_insn (templ, operands);
28342 else
28344 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28345 output_asm_insn (templ, operands);
28347 return "";
28350 /* Output assembly for a WMMX tinsr instruction. */
28351 const char *
28352 arm_output_iwmmxt_tinsr (rtx *operands)
28354 int mask = INTVAL (operands[3]);
28355 int i;
28356 char templ[50];
28357 int units = mode_nunits[GET_MODE (operands[0])];
28358 gcc_assert ((mask & (mask - 1)) == 0);
28359 for (i = 0; i < units; ++i)
28361 if ((mask & 0x01) == 1)
28363 break;
28365 mask >>= 1;
28367 gcc_assert (i < units);
28369 switch (GET_MODE (operands[0]))
28371 case V8QImode:
28372 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28373 break;
28374 case V4HImode:
28375 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28376 break;
28377 case V2SImode:
28378 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28379 break;
28380 default:
28381 gcc_unreachable ();
28382 break;
28384 output_asm_insn (templ, operands);
28386 return "";
28389 /* Output a Thumb-1 casesi dispatch sequence. */
28390 const char *
28391 thumb1_output_casesi (rtx *operands)
28393 rtx diff_vec = PATTERN (next_active_insn (operands[0]));
28395 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28397 switch (GET_MODE(diff_vec))
28399 case QImode:
28400 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28401 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28402 case HImode:
28403 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28404 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28405 case SImode:
28406 return "bl\t%___gnu_thumb1_case_si";
28407 default:
28408 gcc_unreachable ();
28412 /* Output a Thumb-2 casesi instruction. */
28413 const char *
28414 thumb2_output_casesi (rtx *operands)
28416 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
28418 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28420 output_asm_insn ("cmp\t%0, %1", operands);
28421 output_asm_insn ("bhi\t%l3", operands);
28422 switch (GET_MODE(diff_vec))
28424 case QImode:
28425 return "tbb\t[%|pc, %0]";
28426 case HImode:
28427 return "tbh\t[%|pc, %0, lsl #1]";
28428 case SImode:
28429 if (flag_pic)
28431 output_asm_insn ("adr\t%4, %l2", operands);
28432 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28433 output_asm_insn ("add\t%4, %4, %5", operands);
28434 return "bx\t%4";
28436 else
28438 output_asm_insn ("adr\t%4, %l2", operands);
28439 return "ldr\t%|pc, [%4, %0, lsl #2]";
28441 default:
28442 gcc_unreachable ();
28446 /* Most ARM cores are single issue, but some newer ones can dual issue.
28447 The scheduler descriptions rely on this being correct. */
28448 static int
28449 arm_issue_rate (void)
28451 switch (arm_tune)
28453 case cortexa15:
28454 return 3;
28456 case cortexr4:
28457 case cortexr4f:
28458 case cortexr5:
28459 case genericv7a:
28460 case cortexa5:
28461 case cortexa7:
28462 case cortexa8:
28463 case cortexa9:
28464 case cortexa53:
28465 case fa726te:
28466 case marvell_pj4:
28467 return 2;
28469 default:
28470 return 1;
28474 /* A table and a function to perform ARM-specific name mangling for
28475 NEON vector types in order to conform to the AAPCS (see "Procedure
28476 Call Standard for the ARM Architecture", Appendix A). To qualify
28477 for emission with the mangled names defined in that document, a
28478 vector type must not only be of the correct mode but also be
28479 composed of NEON vector element types (e.g. __builtin_neon_qi). */
28480 typedef struct
28482 enum machine_mode mode;
28483 const char *element_type_name;
28484 const char *aapcs_name;
28485 } arm_mangle_map_entry;
28487 static arm_mangle_map_entry arm_mangle_map[] = {
28488 /* 64-bit containerized types. */
28489 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
28490 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
28491 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
28492 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
28493 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
28494 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
28495 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
28496 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
28497 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
28498 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
28499 /* 128-bit containerized types. */
28500 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
28501 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
28502 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
28503 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
28504 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
28505 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
28506 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
28507 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
28508 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
28509 { VOIDmode, NULL, NULL }
28512 const char *
28513 arm_mangle_type (const_tree type)
28515 arm_mangle_map_entry *pos = arm_mangle_map;
28517 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28518 has to be managled as if it is in the "std" namespace. */
28519 if (TARGET_AAPCS_BASED
28520 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28521 return "St9__va_list";
28523 /* Half-precision float. */
28524 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28525 return "Dh";
28527 if (TREE_CODE (type) != VECTOR_TYPE)
28528 return NULL;
28530 /* Check the mode of the vector type, and the name of the vector
28531 element type, against the table. */
28532 while (pos->mode != VOIDmode)
28534 tree elt_type = TREE_TYPE (type);
28536 if (pos->mode == TYPE_MODE (type)
28537 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
28538 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
28539 pos->element_type_name))
28540 return pos->aapcs_name;
28542 pos++;
28545 /* Use the default mangling for unrecognized (possibly user-defined)
28546 vector types. */
28547 return NULL;
28550 /* Order of allocation of core registers for Thumb: this allocation is
28551 written over the corresponding initial entries of the array
28552 initialized with REG_ALLOC_ORDER. We allocate all low registers
28553 first. Saving and restoring a low register is usually cheaper than
28554 using a call-clobbered high register. */
28556 static const int thumb_core_reg_alloc_order[] =
28558 3, 2, 1, 0, 4, 5, 6, 7,
28559 14, 12, 8, 9, 10, 11
28562 /* Adjust register allocation order when compiling for Thumb. */
28564 void
28565 arm_order_regs_for_local_alloc (void)
28567 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28568 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28569 if (TARGET_THUMB)
28570 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28571 sizeof (thumb_core_reg_alloc_order));
28574 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28576 bool
28577 arm_frame_pointer_required (void)
28579 return (cfun->has_nonlocal_label
28580 || SUBTARGET_FRAME_POINTER_REQUIRED
28581 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
28584 /* Only thumb1 can't support conditional execution, so return true if
28585 the target is not thumb1. */
28586 static bool
28587 arm_have_conditional_execution (void)
28589 return !TARGET_THUMB1;
28592 tree
28593 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
28595 enum machine_mode in_mode, out_mode;
28596 int in_n, out_n;
28598 if (TREE_CODE (type_out) != VECTOR_TYPE
28599 || TREE_CODE (type_in) != VECTOR_TYPE
28600 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
28601 return NULL_TREE;
28603 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28604 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28605 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28606 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28608 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
28609 decl of the vectorized builtin for the appropriate vector mode.
28610 NULL_TREE is returned if no such builtin is available. */
28611 #undef ARM_CHECK_BUILTIN_MODE
28612 #define ARM_CHECK_BUILTIN_MODE(C) \
28613 (out_mode == SFmode && out_n == C \
28614 && in_mode == SFmode && in_n == C)
28616 #undef ARM_FIND_VRINT_VARIANT
28617 #define ARM_FIND_VRINT_VARIANT(N) \
28618 (ARM_CHECK_BUILTIN_MODE (2) \
28619 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
28620 : (ARM_CHECK_BUILTIN_MODE (4) \
28621 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
28622 : NULL_TREE))
28624 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
28626 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28627 switch (fn)
28629 case BUILT_IN_FLOORF:
28630 return ARM_FIND_VRINT_VARIANT (vrintm);
28631 case BUILT_IN_CEILF:
28632 return ARM_FIND_VRINT_VARIANT (vrintp);
28633 case BUILT_IN_TRUNCF:
28634 return ARM_FIND_VRINT_VARIANT (vrintz);
28635 case BUILT_IN_ROUNDF:
28636 return ARM_FIND_VRINT_VARIANT (vrinta);
28637 default:
28638 return NULL_TREE;
28641 return NULL_TREE;
28643 #undef ARM_CHECK_BUILTIN_MODE
28644 #undef ARM_FIND_VRINT_VARIANT
28646 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28647 static HOST_WIDE_INT
28648 arm_vector_alignment (const_tree type)
28650 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
28652 if (TARGET_AAPCS_BASED)
28653 align = MIN (align, 64);
28655 return align;
28658 static unsigned int
28659 arm_autovectorize_vector_sizes (void)
28661 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28664 static bool
28665 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28667 /* Vectors which aren't in packed structures will not be less aligned than
28668 the natural alignment of their element type, so this is safe. */
28669 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
28670 return !is_packed;
28672 return default_builtin_vector_alignment_reachable (type, is_packed);
28675 static bool
28676 arm_builtin_support_vector_misalignment (enum machine_mode mode,
28677 const_tree type, int misalignment,
28678 bool is_packed)
28680 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
28682 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28684 if (is_packed)
28685 return align == 1;
28687 /* If the misalignment is unknown, we should be able to handle the access
28688 so long as it is not to a member of a packed data structure. */
28689 if (misalignment == -1)
28690 return true;
28692 /* Return true if the misalignment is a multiple of the natural alignment
28693 of the vector's element type. This is probably always going to be
28694 true in practice, since we've already established that this isn't a
28695 packed access. */
28696 return ((misalignment % align) == 0);
28699 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28700 is_packed);
28703 static void
28704 arm_conditional_register_usage (void)
28706 int regno;
28708 if (TARGET_THUMB1 && optimize_size)
28710 /* When optimizing for size on Thumb-1, it's better not
28711 to use the HI regs, because of the overhead of
28712 stacking them. */
28713 for (regno = FIRST_HI_REGNUM;
28714 regno <= LAST_HI_REGNUM; ++regno)
28715 fixed_regs[regno] = call_used_regs[regno] = 1;
28718 /* The link register can be clobbered by any branch insn,
28719 but we have no way to track that at present, so mark
28720 it as unavailable. */
28721 if (TARGET_THUMB1)
28722 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28724 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
28726 /* VFPv3 registers are disabled when earlier VFP
28727 versions are selected due to the definition of
28728 LAST_VFP_REGNUM. */
28729 for (regno = FIRST_VFP_REGNUM;
28730 regno <= LAST_VFP_REGNUM; ++ regno)
28732 fixed_regs[regno] = 0;
28733 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28734 || regno >= FIRST_VFP_REGNUM + 32;
28738 if (TARGET_REALLY_IWMMXT)
28740 regno = FIRST_IWMMXT_GR_REGNUM;
28741 /* The 2002/10/09 revision of the XScale ABI has wCG0
28742 and wCG1 as call-preserved registers. The 2002/11/21
28743 revision changed this so that all wCG registers are
28744 scratch registers. */
28745 for (regno = FIRST_IWMMXT_GR_REGNUM;
28746 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28747 fixed_regs[regno] = 0;
28748 /* The XScale ABI has wR0 - wR9 as scratch registers,
28749 the rest as call-preserved registers. */
28750 for (regno = FIRST_IWMMXT_REGNUM;
28751 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28753 fixed_regs[regno] = 0;
28754 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28758 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28760 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28761 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28763 else if (TARGET_APCS_STACK)
28765 fixed_regs[10] = 1;
28766 call_used_regs[10] = 1;
28768 /* -mcaller-super-interworking reserves r11 for calls to
28769 _interwork_r11_call_via_rN(). Making the register global
28770 is an easy way of ensuring that it remains valid for all
28771 calls. */
28772 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28773 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28775 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28776 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28777 if (TARGET_CALLER_INTERWORKING)
28778 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28780 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28783 static reg_class_t
28784 arm_preferred_rename_class (reg_class_t rclass)
28786 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28787 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28788 and code size can be reduced. */
28789 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28790 return LO_REGS;
28791 else
28792 return NO_REGS;
28795 /* Compute the atrribute "length" of insn "*push_multi".
28796 So this function MUST be kept in sync with that insn pattern. */
28798 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28800 int i, regno, hi_reg;
28801 int num_saves = XVECLEN (parallel_op, 0);
28803 /* ARM mode. */
28804 if (TARGET_ARM)
28805 return 4;
28806 /* Thumb1 mode. */
28807 if (TARGET_THUMB1)
28808 return 2;
28810 /* Thumb2 mode. */
28811 regno = REGNO (first_op);
28812 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28813 for (i = 1; i < num_saves && !hi_reg; i++)
28815 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28816 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28819 if (!hi_reg)
28820 return 2;
28821 return 4;
28824 /* Compute the number of instructions emitted by output_move_double. */
28826 arm_count_output_move_double_insns (rtx *operands)
28828 int count;
28829 rtx ops[2];
28830 /* output_move_double may modify the operands array, so call it
28831 here on a copy of the array. */
28832 ops[0] = operands[0];
28833 ops[1] = operands[1];
28834 output_move_double (ops, false, &count);
28835 return count;
28839 vfp3_const_double_for_fract_bits (rtx operand)
28841 REAL_VALUE_TYPE r0;
28843 if (!CONST_DOUBLE_P (operand))
28844 return 0;
28846 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
28847 if (exact_real_inverse (DFmode, &r0))
28849 if (exact_real_truncate (DFmode, &r0))
28851 HOST_WIDE_INT value = real_to_integer (&r0);
28852 value = value & 0xffffffff;
28853 if ((value != 0) && ( (value & (value - 1)) == 0))
28854 return int_log2 (value);
28857 return 0;
28860 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28862 static void
28863 arm_pre_atomic_barrier (enum memmodel model)
28865 if (need_atomic_barrier_p (model, true))
28866 emit_insn (gen_memory_barrier ());
28869 static void
28870 arm_post_atomic_barrier (enum memmodel model)
28872 if (need_atomic_barrier_p (model, false))
28873 emit_insn (gen_memory_barrier ());
28876 /* Emit the load-exclusive and store-exclusive instructions.
28877 Use acquire and release versions if necessary. */
28879 static void
28880 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
28882 rtx (*gen) (rtx, rtx);
28884 if (acq)
28886 switch (mode)
28888 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28889 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28890 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28891 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28892 default:
28893 gcc_unreachable ();
28896 else
28898 switch (mode)
28900 case QImode: gen = gen_arm_load_exclusiveqi; break;
28901 case HImode: gen = gen_arm_load_exclusivehi; break;
28902 case SImode: gen = gen_arm_load_exclusivesi; break;
28903 case DImode: gen = gen_arm_load_exclusivedi; break;
28904 default:
28905 gcc_unreachable ();
28909 emit_insn (gen (rval, mem));
28912 static void
28913 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
28914 rtx mem, bool rel)
28916 rtx (*gen) (rtx, rtx, rtx);
28918 if (rel)
28920 switch (mode)
28922 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28923 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28924 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28925 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28926 default:
28927 gcc_unreachable ();
28930 else
28932 switch (mode)
28934 case QImode: gen = gen_arm_store_exclusiveqi; break;
28935 case HImode: gen = gen_arm_store_exclusivehi; break;
28936 case SImode: gen = gen_arm_store_exclusivesi; break;
28937 case DImode: gen = gen_arm_store_exclusivedi; break;
28938 default:
28939 gcc_unreachable ();
28943 emit_insn (gen (bval, rval, mem));
28946 /* Mark the previous jump instruction as unlikely. */
28948 static void
28949 emit_unlikely_jump (rtx insn)
28951 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28953 insn = emit_jump_insn (insn);
28954 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
28957 /* Expand a compare and swap pattern. */
28959 void
28960 arm_expand_compare_and_swap (rtx operands[])
28962 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28963 enum machine_mode mode;
28964 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28966 bval = operands[0];
28967 rval = operands[1];
28968 mem = operands[2];
28969 oldval = operands[3];
28970 newval = operands[4];
28971 is_weak = operands[5];
28972 mod_s = operands[6];
28973 mod_f = operands[7];
28974 mode = GET_MODE (mem);
28976 /* Normally the succ memory model must be stronger than fail, but in the
28977 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28978 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28980 if (TARGET_HAVE_LDACQ
28981 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
28982 && INTVAL (mod_s) == MEMMODEL_RELEASE)
28983 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28985 switch (mode)
28987 case QImode:
28988 case HImode:
28989 /* For narrow modes, we're going to perform the comparison in SImode,
28990 so do the zero-extension now. */
28991 rval = gen_reg_rtx (SImode);
28992 oldval = convert_modes (SImode, mode, oldval, true);
28993 /* FALLTHRU */
28995 case SImode:
28996 /* Force the value into a register if needed. We waited until after
28997 the zero-extension above to do this properly. */
28998 if (!arm_add_operand (oldval, SImode))
28999 oldval = force_reg (SImode, oldval);
29000 break;
29002 case DImode:
29003 if (!cmpdi_operand (oldval, mode))
29004 oldval = force_reg (mode, oldval);
29005 break;
29007 default:
29008 gcc_unreachable ();
29011 switch (mode)
29013 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29014 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29015 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29016 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29017 default:
29018 gcc_unreachable ();
29021 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29023 if (mode == QImode || mode == HImode)
29024 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29026 /* In all cases, we arrange for success to be signaled by Z set.
29027 This arrangement allows for the boolean result to be used directly
29028 in a subsequent branch, post optimization. */
29029 x = gen_rtx_REG (CCmode, CC_REGNUM);
29030 x = gen_rtx_EQ (SImode, x, const0_rtx);
29031 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29034 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29035 another memory store between the load-exclusive and store-exclusive can
29036 reset the monitor from Exclusive to Open state. This means we must wait
29037 until after reload to split the pattern, lest we get a register spill in
29038 the middle of the atomic sequence. */
29040 void
29041 arm_split_compare_and_swap (rtx operands[])
29043 rtx rval, mem, oldval, newval, scratch;
29044 enum machine_mode mode;
29045 enum memmodel mod_s, mod_f;
29046 bool is_weak;
29047 rtx label1, label2, x, cond;
29049 rval = operands[0];
29050 mem = operands[1];
29051 oldval = operands[2];
29052 newval = operands[3];
29053 is_weak = (operands[4] != const0_rtx);
29054 mod_s = (enum memmodel) INTVAL (operands[5]);
29055 mod_f = (enum memmodel) INTVAL (operands[6]);
29056 scratch = operands[7];
29057 mode = GET_MODE (mem);
29059 bool use_acquire = TARGET_HAVE_LDACQ
29060 && !(mod_s == MEMMODEL_RELAXED
29061 || mod_s == MEMMODEL_CONSUME
29062 || mod_s == MEMMODEL_RELEASE);
29064 bool use_release = TARGET_HAVE_LDACQ
29065 && !(mod_s == MEMMODEL_RELAXED
29066 || mod_s == MEMMODEL_CONSUME
29067 || mod_s == MEMMODEL_ACQUIRE);
29069 /* Checks whether a barrier is needed and emits one accordingly. */
29070 if (!(use_acquire || use_release))
29071 arm_pre_atomic_barrier (mod_s);
29073 label1 = NULL_RTX;
29074 if (!is_weak)
29076 label1 = gen_label_rtx ();
29077 emit_label (label1);
29079 label2 = gen_label_rtx ();
29081 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29083 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29084 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29085 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29086 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29087 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29089 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29091 /* Weak or strong, we want EQ to be true for success, so that we
29092 match the flags that we got from the compare above. */
29093 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29094 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29095 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29097 if (!is_weak)
29099 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29100 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29101 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29102 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29105 if (mod_f != MEMMODEL_RELAXED)
29106 emit_label (label2);
29108 /* Checks whether a barrier is needed and emits one accordingly. */
29109 if (!(use_acquire || use_release))
29110 arm_post_atomic_barrier (mod_s);
29112 if (mod_f == MEMMODEL_RELAXED)
29113 emit_label (label2);
29116 void
29117 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29118 rtx value, rtx model_rtx, rtx cond)
29120 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
29121 enum machine_mode mode = GET_MODE (mem);
29122 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
29123 rtx label, x;
29125 bool use_acquire = TARGET_HAVE_LDACQ
29126 && !(model == MEMMODEL_RELAXED
29127 || model == MEMMODEL_CONSUME
29128 || model == MEMMODEL_RELEASE);
29130 bool use_release = TARGET_HAVE_LDACQ
29131 && !(model == MEMMODEL_RELAXED
29132 || model == MEMMODEL_CONSUME
29133 || model == MEMMODEL_ACQUIRE);
29135 /* Checks whether a barrier is needed and emits one accordingly. */
29136 if (!(use_acquire || use_release))
29137 arm_pre_atomic_barrier (model);
29139 label = gen_label_rtx ();
29140 emit_label (label);
29142 if (new_out)
29143 new_out = gen_lowpart (wmode, new_out);
29144 if (old_out)
29145 old_out = gen_lowpart (wmode, old_out);
29146 else
29147 old_out = new_out;
29148 value = simplify_gen_subreg (wmode, value, mode, 0);
29150 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29152 switch (code)
29154 case SET:
29155 new_out = value;
29156 break;
29158 case NOT:
29159 x = gen_rtx_AND (wmode, old_out, value);
29160 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29161 x = gen_rtx_NOT (wmode, new_out);
29162 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29163 break;
29165 case MINUS:
29166 if (CONST_INT_P (value))
29168 value = GEN_INT (-INTVAL (value));
29169 code = PLUS;
29171 /* FALLTHRU */
29173 case PLUS:
29174 if (mode == DImode)
29176 /* DImode plus/minus need to clobber flags. */
29177 /* The adddi3 and subdi3 patterns are incorrectly written so that
29178 they require matching operands, even when we could easily support
29179 three operands. Thankfully, this can be fixed up post-splitting,
29180 as the individual add+adc patterns do accept three operands and
29181 post-reload cprop can make these moves go away. */
29182 emit_move_insn (new_out, old_out);
29183 if (code == PLUS)
29184 x = gen_adddi3 (new_out, new_out, value);
29185 else
29186 x = gen_subdi3 (new_out, new_out, value);
29187 emit_insn (x);
29188 break;
29190 /* FALLTHRU */
29192 default:
29193 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29194 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29195 break;
29198 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29199 use_release);
29201 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29202 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29204 /* Checks whether a barrier is needed and emits one accordingly. */
29205 if (!(use_acquire || use_release))
29206 arm_post_atomic_barrier (model);
29209 #define MAX_VECT_LEN 16
29211 struct expand_vec_perm_d
29213 rtx target, op0, op1;
29214 unsigned char perm[MAX_VECT_LEN];
29215 enum machine_mode vmode;
29216 unsigned char nelt;
29217 bool one_vector_p;
29218 bool testing_p;
29221 /* Generate a variable permutation. */
29223 static void
29224 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29226 enum machine_mode vmode = GET_MODE (target);
29227 bool one_vector_p = rtx_equal_p (op0, op1);
29229 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29230 gcc_checking_assert (GET_MODE (op0) == vmode);
29231 gcc_checking_assert (GET_MODE (op1) == vmode);
29232 gcc_checking_assert (GET_MODE (sel) == vmode);
29233 gcc_checking_assert (TARGET_NEON);
29235 if (one_vector_p)
29237 if (vmode == V8QImode)
29238 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29239 else
29240 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29242 else
29244 rtx pair;
29246 if (vmode == V8QImode)
29248 pair = gen_reg_rtx (V16QImode);
29249 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29250 pair = gen_lowpart (TImode, pair);
29251 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29253 else
29255 pair = gen_reg_rtx (OImode);
29256 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29257 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29262 void
29263 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29265 enum machine_mode vmode = GET_MODE (target);
29266 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
29267 bool one_vector_p = rtx_equal_p (op0, op1);
29268 rtx rmask[MAX_VECT_LEN], mask;
29270 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29271 numbering of elements for big-endian, we must reverse the order. */
29272 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29274 /* The VTBL instruction does not use a modulo index, so we must take care
29275 of that ourselves. */
29276 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29277 for (i = 0; i < nelt; ++i)
29278 rmask[i] = mask;
29279 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
29280 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29282 arm_expand_vec_perm_1 (target, op0, op1, sel);
29285 /* Generate or test for an insn that supports a constant permutation. */
29287 /* Recognize patterns for the VUZP insns. */
29289 static bool
29290 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29292 unsigned int i, odd, mask, nelt = d->nelt;
29293 rtx out0, out1, in0, in1, x;
29294 rtx (*gen)(rtx, rtx, rtx, rtx);
29296 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29297 return false;
29299 /* Note that these are little-endian tests. Adjust for big-endian later. */
29300 if (d->perm[0] == 0)
29301 odd = 0;
29302 else if (d->perm[0] == 1)
29303 odd = 1;
29304 else
29305 return false;
29306 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29308 for (i = 0; i < nelt; i++)
29310 unsigned elt = (i * 2 + odd) & mask;
29311 if (d->perm[i] != elt)
29312 return false;
29315 /* Success! */
29316 if (d->testing_p)
29317 return true;
29319 switch (d->vmode)
29321 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29322 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29323 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29324 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29325 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29326 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29327 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29328 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29329 default:
29330 gcc_unreachable ();
29333 in0 = d->op0;
29334 in1 = d->op1;
29335 if (BYTES_BIG_ENDIAN)
29337 x = in0, in0 = in1, in1 = x;
29338 odd = !odd;
29341 out0 = d->target;
29342 out1 = gen_reg_rtx (d->vmode);
29343 if (odd)
29344 x = out0, out0 = out1, out1 = x;
29346 emit_insn (gen (out0, in0, in1, out1));
29347 return true;
29350 /* Recognize patterns for the VZIP insns. */
29352 static bool
29353 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29355 unsigned int i, high, mask, nelt = d->nelt;
29356 rtx out0, out1, in0, in1, x;
29357 rtx (*gen)(rtx, rtx, rtx, rtx);
29359 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29360 return false;
29362 /* Note that these are little-endian tests. Adjust for big-endian later. */
29363 high = nelt / 2;
29364 if (d->perm[0] == high)
29366 else if (d->perm[0] == 0)
29367 high = 0;
29368 else
29369 return false;
29370 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29372 for (i = 0; i < nelt / 2; i++)
29374 unsigned elt = (i + high) & mask;
29375 if (d->perm[i * 2] != elt)
29376 return false;
29377 elt = (elt + nelt) & mask;
29378 if (d->perm[i * 2 + 1] != elt)
29379 return false;
29382 /* Success! */
29383 if (d->testing_p)
29384 return true;
29386 switch (d->vmode)
29388 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29389 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29390 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29391 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29392 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
29393 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
29394 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29395 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29396 default:
29397 gcc_unreachable ();
29400 in0 = d->op0;
29401 in1 = d->op1;
29402 if (BYTES_BIG_ENDIAN)
29404 x = in0, in0 = in1, in1 = x;
29405 high = !high;
29408 out0 = d->target;
29409 out1 = gen_reg_rtx (d->vmode);
29410 if (high)
29411 x = out0, out0 = out1, out1 = x;
29413 emit_insn (gen (out0, in0, in1, out1));
29414 return true;
29417 /* Recognize patterns for the VREV insns. */
29419 static bool
29420 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29422 unsigned int i, j, diff, nelt = d->nelt;
29423 rtx (*gen)(rtx, rtx, rtx);
29425 if (!d->one_vector_p)
29426 return false;
29428 diff = d->perm[0];
29429 switch (diff)
29431 case 7:
29432 switch (d->vmode)
29434 case V16QImode: gen = gen_neon_vrev64v16qi; break;
29435 case V8QImode: gen = gen_neon_vrev64v8qi; break;
29436 default:
29437 return false;
29439 break;
29440 case 3:
29441 switch (d->vmode)
29443 case V16QImode: gen = gen_neon_vrev32v16qi; break;
29444 case V8QImode: gen = gen_neon_vrev32v8qi; break;
29445 case V8HImode: gen = gen_neon_vrev64v8hi; break;
29446 case V4HImode: gen = gen_neon_vrev64v4hi; break;
29447 default:
29448 return false;
29450 break;
29451 case 1:
29452 switch (d->vmode)
29454 case V16QImode: gen = gen_neon_vrev16v16qi; break;
29455 case V8QImode: gen = gen_neon_vrev16v8qi; break;
29456 case V8HImode: gen = gen_neon_vrev32v8hi; break;
29457 case V4HImode: gen = gen_neon_vrev32v4hi; break;
29458 case V4SImode: gen = gen_neon_vrev64v4si; break;
29459 case V2SImode: gen = gen_neon_vrev64v2si; break;
29460 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
29461 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
29462 default:
29463 return false;
29465 break;
29466 default:
29467 return false;
29470 for (i = 0; i < nelt ; i += diff + 1)
29471 for (j = 0; j <= diff; j += 1)
29473 /* This is guaranteed to be true as the value of diff
29474 is 7, 3, 1 and we should have enough elements in the
29475 queue to generate this. Getting a vector mask with a
29476 value of diff other than these values implies that
29477 something is wrong by the time we get here. */
29478 gcc_assert (i + j < nelt);
29479 if (d->perm[i + j] != i + diff - j)
29480 return false;
29483 /* Success! */
29484 if (d->testing_p)
29485 return true;
29487 /* ??? The third operand is an artifact of the builtin infrastructure
29488 and is ignored by the actual instruction. */
29489 emit_insn (gen (d->target, d->op0, const0_rtx));
29490 return true;
29493 /* Recognize patterns for the VTRN insns. */
29495 static bool
29496 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29498 unsigned int i, odd, mask, nelt = d->nelt;
29499 rtx out0, out1, in0, in1, x;
29500 rtx (*gen)(rtx, rtx, rtx, rtx);
29502 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29503 return false;
29505 /* Note that these are little-endian tests. Adjust for big-endian later. */
29506 if (d->perm[0] == 0)
29507 odd = 0;
29508 else if (d->perm[0] == 1)
29509 odd = 1;
29510 else
29511 return false;
29512 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29514 for (i = 0; i < nelt; i += 2)
29516 if (d->perm[i] != i + odd)
29517 return false;
29518 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29519 return false;
29522 /* Success! */
29523 if (d->testing_p)
29524 return true;
29526 switch (d->vmode)
29528 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29529 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29530 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29531 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29532 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29533 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29534 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29535 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29536 default:
29537 gcc_unreachable ();
29540 in0 = d->op0;
29541 in1 = d->op1;
29542 if (BYTES_BIG_ENDIAN)
29544 x = in0, in0 = in1, in1 = x;
29545 odd = !odd;
29548 out0 = d->target;
29549 out1 = gen_reg_rtx (d->vmode);
29550 if (odd)
29551 x = out0, out0 = out1, out1 = x;
29553 emit_insn (gen (out0, in0, in1, out1));
29554 return true;
29557 /* Recognize patterns for the VEXT insns. */
29559 static bool
29560 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29562 unsigned int i, nelt = d->nelt;
29563 rtx (*gen) (rtx, rtx, rtx, rtx);
29564 rtx offset;
29566 unsigned int location;
29568 unsigned int next = d->perm[0] + 1;
29570 /* TODO: Handle GCC's numbering of elements for big-endian. */
29571 if (BYTES_BIG_ENDIAN)
29572 return false;
29574 /* Check if the extracted indexes are increasing by one. */
29575 for (i = 1; i < nelt; next++, i++)
29577 /* If we hit the most significant element of the 2nd vector in
29578 the previous iteration, no need to test further. */
29579 if (next == 2 * nelt)
29580 return false;
29582 /* If we are operating on only one vector: it could be a
29583 rotation. If there are only two elements of size < 64, let
29584 arm_evpc_neon_vrev catch it. */
29585 if (d->one_vector_p && (next == nelt))
29587 if ((nelt == 2) && (d->vmode != V2DImode))
29588 return false;
29589 else
29590 next = 0;
29593 if (d->perm[i] != next)
29594 return false;
29597 location = d->perm[0];
29599 switch (d->vmode)
29601 case V16QImode: gen = gen_neon_vextv16qi; break;
29602 case V8QImode: gen = gen_neon_vextv8qi; break;
29603 case V4HImode: gen = gen_neon_vextv4hi; break;
29604 case V8HImode: gen = gen_neon_vextv8hi; break;
29605 case V2SImode: gen = gen_neon_vextv2si; break;
29606 case V4SImode: gen = gen_neon_vextv4si; break;
29607 case V2SFmode: gen = gen_neon_vextv2sf; break;
29608 case V4SFmode: gen = gen_neon_vextv4sf; break;
29609 case V2DImode: gen = gen_neon_vextv2di; break;
29610 default:
29611 return false;
29614 /* Success! */
29615 if (d->testing_p)
29616 return true;
29618 offset = GEN_INT (location);
29619 emit_insn (gen (d->target, d->op0, d->op1, offset));
29620 return true;
29623 /* The NEON VTBL instruction is a fully variable permuation that's even
29624 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29625 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29626 can do slightly better by expanding this as a constant where we don't
29627 have to apply a mask. */
29629 static bool
29630 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29632 rtx rperm[MAX_VECT_LEN], sel;
29633 enum machine_mode vmode = d->vmode;
29634 unsigned int i, nelt = d->nelt;
29636 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29637 numbering of elements for big-endian, we must reverse the order. */
29638 if (BYTES_BIG_ENDIAN)
29639 return false;
29641 if (d->testing_p)
29642 return true;
29644 /* Generic code will try constant permutation twice. Once with the
29645 original mode and again with the elements lowered to QImode.
29646 So wait and don't do the selector expansion ourselves. */
29647 if (vmode != V8QImode && vmode != V16QImode)
29648 return false;
29650 for (i = 0; i < nelt; ++i)
29651 rperm[i] = GEN_INT (d->perm[i]);
29652 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29653 sel = force_reg (vmode, sel);
29655 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29656 return true;
29659 static bool
29660 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29662 /* Check if the input mask matches vext before reordering the
29663 operands. */
29664 if (TARGET_NEON)
29665 if (arm_evpc_neon_vext (d))
29666 return true;
29668 /* The pattern matching functions above are written to look for a small
29669 number to begin the sequence (0, 1, N/2). If we begin with an index
29670 from the second operand, we can swap the operands. */
29671 if (d->perm[0] >= d->nelt)
29673 unsigned i, nelt = d->nelt;
29674 rtx x;
29676 for (i = 0; i < nelt; ++i)
29677 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29679 x = d->op0;
29680 d->op0 = d->op1;
29681 d->op1 = x;
29684 if (TARGET_NEON)
29686 if (arm_evpc_neon_vuzp (d))
29687 return true;
29688 if (arm_evpc_neon_vzip (d))
29689 return true;
29690 if (arm_evpc_neon_vrev (d))
29691 return true;
29692 if (arm_evpc_neon_vtrn (d))
29693 return true;
29694 return arm_evpc_neon_vtbl (d);
29696 return false;
29699 /* Expand a vec_perm_const pattern. */
29701 bool
29702 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29704 struct expand_vec_perm_d d;
29705 int i, nelt, which;
29707 d.target = target;
29708 d.op0 = op0;
29709 d.op1 = op1;
29711 d.vmode = GET_MODE (target);
29712 gcc_assert (VECTOR_MODE_P (d.vmode));
29713 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29714 d.testing_p = false;
29716 for (i = which = 0; i < nelt; ++i)
29718 rtx e = XVECEXP (sel, 0, i);
29719 int ei = INTVAL (e) & (2 * nelt - 1);
29720 which |= (ei < nelt ? 1 : 2);
29721 d.perm[i] = ei;
29724 switch (which)
29726 default:
29727 gcc_unreachable();
29729 case 3:
29730 d.one_vector_p = false;
29731 if (!rtx_equal_p (op0, op1))
29732 break;
29734 /* The elements of PERM do not suggest that only the first operand
29735 is used, but both operands are identical. Allow easier matching
29736 of the permutation by folding the permutation into the single
29737 input vector. */
29738 /* FALLTHRU */
29739 case 2:
29740 for (i = 0; i < nelt; ++i)
29741 d.perm[i] &= nelt - 1;
29742 d.op0 = op1;
29743 d.one_vector_p = true;
29744 break;
29746 case 1:
29747 d.op1 = op0;
29748 d.one_vector_p = true;
29749 break;
29752 return arm_expand_vec_perm_const_1 (&d);
29755 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29757 static bool
29758 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
29759 const unsigned char *sel)
29761 struct expand_vec_perm_d d;
29762 unsigned int i, nelt, which;
29763 bool ret;
29765 d.vmode = vmode;
29766 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29767 d.testing_p = true;
29768 memcpy (d.perm, sel, nelt);
29770 /* Categorize the set of elements in the selector. */
29771 for (i = which = 0; i < nelt; ++i)
29773 unsigned char e = d.perm[i];
29774 gcc_assert (e < 2 * nelt);
29775 which |= (e < nelt ? 1 : 2);
29778 /* For all elements from second vector, fold the elements to first. */
29779 if (which == 2)
29780 for (i = 0; i < nelt; ++i)
29781 d.perm[i] -= nelt;
29783 /* Check whether the mask can be applied to the vector type. */
29784 d.one_vector_p = (which != 3);
29786 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29787 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29788 if (!d.one_vector_p)
29789 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29791 start_sequence ();
29792 ret = arm_expand_vec_perm_const_1 (&d);
29793 end_sequence ();
29795 return ret;
29798 bool
29799 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
29801 /* If we are soft float and we do not have ldrd
29802 then all auto increment forms are ok. */
29803 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29804 return true;
29806 switch (code)
29808 /* Post increment and Pre Decrement are supported for all
29809 instruction forms except for vector forms. */
29810 case ARM_POST_INC:
29811 case ARM_PRE_DEC:
29812 if (VECTOR_MODE_P (mode))
29814 if (code != ARM_PRE_DEC)
29815 return true;
29816 else
29817 return false;
29820 return true;
29822 case ARM_POST_DEC:
29823 case ARM_PRE_INC:
29824 /* Without LDRD and mode size greater than
29825 word size, there is no point in auto-incrementing
29826 because ldm and stm will not have these forms. */
29827 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29828 return false;
29830 /* Vector and floating point modes do not support
29831 these auto increment forms. */
29832 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29833 return false;
29835 return true;
29837 default:
29838 return false;
29842 return false;
29845 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29846 on ARM, since we know that shifts by negative amounts are no-ops.
29847 Additionally, the default expansion code is not available or suitable
29848 for post-reload insn splits (this can occur when the register allocator
29849 chooses not to do a shift in NEON).
29851 This function is used in both initial expand and post-reload splits, and
29852 handles all kinds of 64-bit shifts.
29854 Input requirements:
29855 - It is safe for the input and output to be the same register, but
29856 early-clobber rules apply for the shift amount and scratch registers.
29857 - Shift by register requires both scratch registers. In all other cases
29858 the scratch registers may be NULL.
29859 - Ashiftrt by a register also clobbers the CC register. */
29860 void
29861 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29862 rtx amount, rtx scratch1, rtx scratch2)
29864 rtx out_high = gen_highpart (SImode, out);
29865 rtx out_low = gen_lowpart (SImode, out);
29866 rtx in_high = gen_highpart (SImode, in);
29867 rtx in_low = gen_lowpart (SImode, in);
29869 /* Terminology:
29870 in = the register pair containing the input value.
29871 out = the destination register pair.
29872 up = the high- or low-part of each pair.
29873 down = the opposite part to "up".
29874 In a shift, we can consider bits to shift from "up"-stream to
29875 "down"-stream, so in a left-shift "up" is the low-part and "down"
29876 is the high-part of each register pair. */
29878 rtx out_up = code == ASHIFT ? out_low : out_high;
29879 rtx out_down = code == ASHIFT ? out_high : out_low;
29880 rtx in_up = code == ASHIFT ? in_low : in_high;
29881 rtx in_down = code == ASHIFT ? in_high : in_low;
29883 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29884 gcc_assert (out
29885 && (REG_P (out) || GET_CODE (out) == SUBREG)
29886 && GET_MODE (out) == DImode);
29887 gcc_assert (in
29888 && (REG_P (in) || GET_CODE (in) == SUBREG)
29889 && GET_MODE (in) == DImode);
29890 gcc_assert (amount
29891 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29892 && GET_MODE (amount) == SImode)
29893 || CONST_INT_P (amount)));
29894 gcc_assert (scratch1 == NULL
29895 || (GET_CODE (scratch1) == SCRATCH)
29896 || (GET_MODE (scratch1) == SImode
29897 && REG_P (scratch1)));
29898 gcc_assert (scratch2 == NULL
29899 || (GET_CODE (scratch2) == SCRATCH)
29900 || (GET_MODE (scratch2) == SImode
29901 && REG_P (scratch2)));
29902 gcc_assert (!REG_P (out) || !REG_P (amount)
29903 || !HARD_REGISTER_P (out)
29904 || (REGNO (out) != REGNO (amount)
29905 && REGNO (out) + 1 != REGNO (amount)));
29907 /* Macros to make following code more readable. */
29908 #define SUB_32(DEST,SRC) \
29909 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29910 #define RSB_32(DEST,SRC) \
29911 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29912 #define SUB_S_32(DEST,SRC) \
29913 gen_addsi3_compare0 ((DEST), (SRC), \
29914 GEN_INT (-32))
29915 #define SET(DEST,SRC) \
29916 gen_rtx_SET (SImode, (DEST), (SRC))
29917 #define SHIFT(CODE,SRC,AMOUNT) \
29918 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29919 #define LSHIFT(CODE,SRC,AMOUNT) \
29920 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29921 SImode, (SRC), (AMOUNT))
29922 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29923 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29924 SImode, (SRC), (AMOUNT))
29925 #define ORR(A,B) \
29926 gen_rtx_IOR (SImode, (A), (B))
29927 #define BRANCH(COND,LABEL) \
29928 gen_arm_cond_branch ((LABEL), \
29929 gen_rtx_ ## COND (CCmode, cc_reg, \
29930 const0_rtx), \
29931 cc_reg)
29933 /* Shifts by register and shifts by constant are handled separately. */
29934 if (CONST_INT_P (amount))
29936 /* We have a shift-by-constant. */
29938 /* First, handle out-of-range shift amounts.
29939 In both cases we try to match the result an ARM instruction in a
29940 shift-by-register would give. This helps reduce execution
29941 differences between optimization levels, but it won't stop other
29942 parts of the compiler doing different things. This is "undefined
29943 behaviour, in any case. */
29944 if (INTVAL (amount) <= 0)
29945 emit_insn (gen_movdi (out, in));
29946 else if (INTVAL (amount) >= 64)
29948 if (code == ASHIFTRT)
29950 rtx const31_rtx = GEN_INT (31);
29951 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29952 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29954 else
29955 emit_insn (gen_movdi (out, const0_rtx));
29958 /* Now handle valid shifts. */
29959 else if (INTVAL (amount) < 32)
29961 /* Shifts by a constant less than 32. */
29962 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29964 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29965 emit_insn (SET (out_down,
29966 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29967 out_down)));
29968 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29970 else
29972 /* Shifts by a constant greater than 31. */
29973 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29975 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29976 if (code == ASHIFTRT)
29977 emit_insn (gen_ashrsi3 (out_up, in_up,
29978 GEN_INT (31)));
29979 else
29980 emit_insn (SET (out_up, const0_rtx));
29983 else
29985 /* We have a shift-by-register. */
29986 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29988 /* This alternative requires the scratch registers. */
29989 gcc_assert (scratch1 && REG_P (scratch1));
29990 gcc_assert (scratch2 && REG_P (scratch2));
29992 /* We will need the values "amount-32" and "32-amount" later.
29993 Swapping them around now allows the later code to be more general. */
29994 switch (code)
29996 case ASHIFT:
29997 emit_insn (SUB_32 (scratch1, amount));
29998 emit_insn (RSB_32 (scratch2, amount));
29999 break;
30000 case ASHIFTRT:
30001 emit_insn (RSB_32 (scratch1, amount));
30002 /* Also set CC = amount > 32. */
30003 emit_insn (SUB_S_32 (scratch2, amount));
30004 break;
30005 case LSHIFTRT:
30006 emit_insn (RSB_32 (scratch1, amount));
30007 emit_insn (SUB_32 (scratch2, amount));
30008 break;
30009 default:
30010 gcc_unreachable ();
30013 /* Emit code like this:
30015 arithmetic-left:
30016 out_down = in_down << amount;
30017 out_down = (in_up << (amount - 32)) | out_down;
30018 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30019 out_up = in_up << amount;
30021 arithmetic-right:
30022 out_down = in_down >> amount;
30023 out_down = (in_up << (32 - amount)) | out_down;
30024 if (amount < 32)
30025 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30026 out_up = in_up << amount;
30028 logical-right:
30029 out_down = in_down >> amount;
30030 out_down = (in_up << (32 - amount)) | out_down;
30031 if (amount < 32)
30032 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30033 out_up = in_up << amount;
30035 The ARM and Thumb2 variants are the same but implemented slightly
30036 differently. If this were only called during expand we could just
30037 use the Thumb2 case and let combine do the right thing, but this
30038 can also be called from post-reload splitters. */
30040 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30042 if (!TARGET_THUMB2)
30044 /* Emit code for ARM mode. */
30045 emit_insn (SET (out_down,
30046 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30047 if (code == ASHIFTRT)
30049 rtx done_label = gen_label_rtx ();
30050 emit_jump_insn (BRANCH (LT, done_label));
30051 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30052 out_down)));
30053 emit_label (done_label);
30055 else
30056 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30057 out_down)));
30059 else
30061 /* Emit code for Thumb2 mode.
30062 Thumb2 can't do shift and or in one insn. */
30063 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30064 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30066 if (code == ASHIFTRT)
30068 rtx done_label = gen_label_rtx ();
30069 emit_jump_insn (BRANCH (LT, done_label));
30070 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30071 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30072 emit_label (done_label);
30074 else
30076 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30077 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30081 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30084 #undef SUB_32
30085 #undef RSB_32
30086 #undef SUB_S_32
30087 #undef SET
30088 #undef SHIFT
30089 #undef LSHIFT
30090 #undef REV_LSHIFT
30091 #undef ORR
30092 #undef BRANCH
30096 /* Returns true if a valid comparison operation and makes
30097 the operands in a form that is valid. */
30098 bool
30099 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30101 enum rtx_code code = GET_CODE (*comparison);
30102 int code_int;
30103 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30104 ? GET_MODE (*op2) : GET_MODE (*op1);
30106 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30108 if (code == UNEQ || code == LTGT)
30109 return false;
30111 code_int = (int)code;
30112 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30113 PUT_CODE (*comparison, (enum rtx_code)code_int);
30115 switch (mode)
30117 case SImode:
30118 if (!arm_add_operand (*op1, mode))
30119 *op1 = force_reg (mode, *op1);
30120 if (!arm_add_operand (*op2, mode))
30121 *op2 = force_reg (mode, *op2);
30122 return true;
30124 case DImode:
30125 if (!cmpdi_operand (*op1, mode))
30126 *op1 = force_reg (mode, *op1);
30127 if (!cmpdi_operand (*op2, mode))
30128 *op2 = force_reg (mode, *op2);
30129 return true;
30131 case SFmode:
30132 case DFmode:
30133 if (!arm_float_compare_operand (*op1, mode))
30134 *op1 = force_reg (mode, *op1);
30135 if (!arm_float_compare_operand (*op2, mode))
30136 *op2 = force_reg (mode, *op2);
30137 return true;
30138 default:
30139 break;
30142 return false;
30146 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30148 static unsigned HOST_WIDE_INT
30149 arm_asan_shadow_offset (void)
30151 return (unsigned HOST_WIDE_INT) 1 << 29;
30154 #include "gt-arm.h"