[ARM] Initialise T16-related fields in Cortex-A8 tuning struct.
[official-gcc.git] / gcc / config / arm / arm.c
blobde457a36647c83d7752602798df69baca53e865d
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
67 void (*arm_lang_output_object_attributes_hook)(void);
69 struct four_ints
71 int i[4];
74 /* Forward function declarations. */
75 static bool arm_const_not_ok_for_debug_p (rtx);
76 static bool arm_lra_p (void);
77 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
78 static int arm_compute_static_chain_stack_bytes (void);
79 static arm_stack_offsets *arm_get_frame_offsets (void);
80 static void arm_add_gc_roots (void);
81 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
82 HOST_WIDE_INT, rtx, rtx, int, int);
83 static unsigned bit_count (unsigned long);
84 static int arm_address_register_rtx_p (rtx, int);
85 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
86 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
87 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
88 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
89 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
90 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
91 inline static int thumb1_index_register_rtx_p (rtx, int);
92 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
93 static int thumb_far_jump_used_p (void);
94 static bool thumb_force_lr_save (void);
95 static unsigned arm_size_return_regs (void);
96 static bool arm_assemble_integer (rtx, unsigned int, int);
97 static void arm_print_operand (FILE *, rtx, int);
98 static void arm_print_operand_address (FILE *, rtx);
99 static bool arm_print_operand_punct_valid_p (unsigned char code);
100 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
101 static arm_cc get_arm_condition_code (rtx);
102 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
103 static const char *output_multi_immediate (rtx *, const char *, const char *,
104 int, HOST_WIDE_INT);
105 static const char *shift_op (rtx, HOST_WIDE_INT *);
106 static struct machine_function *arm_init_machine_status (void);
107 static void thumb_exit (FILE *, int);
108 static HOST_WIDE_INT get_jump_table_size (rtx);
109 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_forward_ref (Mfix *);
111 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
112 static Mnode *add_minipool_backward_ref (Mfix *);
113 static void assign_minipool_offsets (Mfix *);
114 static void arm_print_value (FILE *, rtx);
115 static void dump_minipool (rtx);
116 static int arm_barrier_cost (rtx);
117 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
118 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
119 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
120 rtx);
121 static void arm_reorg (void);
122 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
123 static unsigned long arm_compute_save_reg0_reg12_mask (void);
124 static unsigned long arm_compute_save_reg_mask (void);
125 static unsigned long arm_isr_value (tree);
126 static unsigned long arm_compute_func_type (void);
127 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
129 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
130 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
131 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
132 #endif
133 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
134 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
135 static int arm_comp_type_attributes (const_tree, const_tree);
136 static void arm_set_default_type_attributes (tree);
137 static int arm_adjust_cost (rtx, rtx, rtx, int);
138 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
139 static int optimal_immediate_sequence (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence);
142 static int optimal_immediate_sequence_1 (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence,
145 int i);
146 static int arm_get_strip_length (int);
147 static bool arm_function_ok_for_sibcall (tree, tree);
148 static enum machine_mode arm_promote_function_mode (const_tree,
149 enum machine_mode, int *,
150 const_tree, int);
151 static bool arm_return_in_memory (const_tree, const_tree);
152 static rtx arm_function_value (const_tree, const_tree, bool);
153 static rtx arm_libcall_value_1 (enum machine_mode);
154 static rtx arm_libcall_value (enum machine_mode, const_rtx);
155 static bool arm_function_value_regno_p (const unsigned int);
156 static void arm_internal_label (FILE *, const char *, unsigned long);
157 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
158 tree);
159 static bool arm_have_conditional_execution (void);
160 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
161 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
162 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
163 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
164 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
169 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
170 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
171 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static rtx emit_multi_reg_push (unsigned long, unsigned long);
182 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 tree, bool);
184 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 const_tree, bool);
186 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 const_tree, bool);
188 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
189 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 const_tree);
191 static rtx aapcs_libcall_value (enum machine_mode);
192 static int aapcs_select_return_coproc (const_tree, const_tree);
194 #ifdef OBJECT_FORMAT_ELF
195 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
196 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
197 #endif
198 #ifndef ARM_PE
199 static void arm_encode_section_info (tree, rtx, int);
200 #endif
202 static void arm_file_end (void);
203 static void arm_file_start (void);
205 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 tree, int *, int);
207 static bool arm_pass_by_reference (cumulative_args_t,
208 enum machine_mode, const_tree, bool);
209 static bool arm_promote_prototypes (const_tree);
210 static bool arm_default_short_enums (void);
211 static bool arm_align_anon_bitfield (void);
212 static bool arm_return_in_msb (const_tree);
213 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
214 static bool arm_return_in_memory (const_tree, const_tree);
215 #if ARM_UNWIND_INFO
216 static void arm_unwind_emit (FILE *, rtx);
217 static bool arm_output_ttype (rtx);
218 static void arm_asm_emit_except_personality (rtx);
219 static void arm_asm_init_sections (void);
220 #endif
221 static rtx arm_dwarf_register_span (rtx);
223 static tree arm_cxx_guard_type (void);
224 static bool arm_cxx_guard_mask_bit (void);
225 static tree arm_get_cookie_size (tree);
226 static bool arm_cookie_has_size (void);
227 static bool arm_cxx_cdtor_returns_this (void);
228 static bool arm_cxx_key_method_may_be_inline (void);
229 static void arm_cxx_determine_class_data_visibility (tree);
230 static bool arm_cxx_class_data_always_comdat (void);
231 static bool arm_cxx_use_aeabi_atexit (void);
232 static void arm_init_libfuncs (void);
233 static tree arm_build_builtin_va_list (void);
234 static void arm_expand_builtin_va_start (tree, rtx);
235 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
236 static void arm_option_override (void);
237 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
238 static bool arm_cannot_copy_insn_p (rtx);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
679 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
681 struct gcc_target targetm = TARGET_INITIALIZER;
683 /* Obstack for minipool constant handling. */
684 static struct obstack minipool_obstack;
685 static char * minipool_startobj;
687 /* The maximum number of insns skipped which
688 will be conditionalised if possible. */
689 static int max_insns_skipped = 5;
691 extern FILE * asm_out_file;
693 /* True if we are currently building a constant table. */
694 int making_const_table;
696 /* The processor for which instructions should be scheduled. */
697 enum processor_type arm_tune = arm_none;
699 /* The current tuning set. */
700 const struct tune_params *current_tune;
702 /* Which floating point hardware to schedule for. */
703 int arm_fpu_attr;
705 /* Which floating popint hardware to use. */
706 const struct arm_fpu_desc *arm_fpu_desc;
708 /* Used for Thumb call_via trampolines. */
709 rtx thumb_call_via_label[14];
710 static int thumb_call_reg_needed;
712 /* Bit values used to identify processor capabilities. */
713 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
714 #define FL_ARCH3M (1 << 1) /* Extended multiply */
715 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
716 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
717 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
718 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
719 #define FL_THUMB (1 << 6) /* Thumb aware */
720 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
721 #define FL_STRONG (1 << 8) /* StrongARM */
722 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
723 #define FL_XSCALE (1 << 10) /* XScale */
724 /* spare (1 << 11) */
725 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
726 media instructions. */
727 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
728 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
729 Note: ARM6 & 7 derivatives only. */
730 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
731 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
732 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
733 profile. */
734 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
735 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
736 #define FL_NEON (1 << 20) /* Neon instructions. */
737 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
738 architecture. */
739 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
740 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
741 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
742 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
744 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
745 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
747 /* Flags that only effect tuning, not available instructions. */
748 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
749 | FL_CO_PROC)
751 #define FL_FOR_ARCH2 FL_NOTM
752 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
753 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
754 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
755 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
756 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
757 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
758 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
759 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
760 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
761 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
762 #define FL_FOR_ARCH6J FL_FOR_ARCH6
763 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
764 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
765 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
766 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
767 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
768 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
769 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
770 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
771 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
772 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
773 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
774 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
776 /* The bits in this mask specify which
777 instructions we are allowed to generate. */
778 static unsigned long insn_flags = 0;
780 /* The bits in this mask specify which instruction scheduling options should
781 be used. */
782 static unsigned long tune_flags = 0;
784 /* The highest ARM architecture version supported by the
785 target. */
786 enum base_architecture arm_base_arch = BASE_ARCH_0;
788 /* The following are used in the arm.md file as equivalents to bits
789 in the above two flag variables. */
791 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
792 int arm_arch3m = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
795 int arm_arch4 = 0;
797 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
798 int arm_arch4t = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
801 int arm_arch5 = 0;
803 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
804 int arm_arch5e = 0;
806 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
807 int arm_arch6 = 0;
809 /* Nonzero if this chip supports the ARM 6K extensions. */
810 int arm_arch6k = 0;
812 /* Nonzero if instructions present in ARMv6-M can be used. */
813 int arm_arch6m = 0;
815 /* Nonzero if this chip supports the ARM 7 extensions. */
816 int arm_arch7 = 0;
818 /* Nonzero if instructions not present in the 'M' profile can be used. */
819 int arm_arch_notm = 0;
821 /* Nonzero if instructions present in ARMv7E-M can be used. */
822 int arm_arch7em = 0;
824 /* Nonzero if instructions present in ARMv8 can be used. */
825 int arm_arch8 = 0;
827 /* Nonzero if this chip can benefit from load scheduling. */
828 int arm_ld_sched = 0;
830 /* Nonzero if this chip is a StrongARM. */
831 int arm_tune_strongarm = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX technology. */
834 int arm_arch_iwmmxt = 0;
836 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
837 int arm_arch_iwmmxt2 = 0;
839 /* Nonzero if this chip is an XScale. */
840 int arm_arch_xscale = 0;
842 /* Nonzero if tuning for XScale */
843 int arm_tune_xscale = 0;
845 /* Nonzero if we want to tune for stores that access the write-buffer.
846 This typically means an ARM6 or ARM7 with MMU or MPU. */
847 int arm_tune_wbuf = 0;
849 /* Nonzero if tuning for Cortex-A9. */
850 int arm_tune_cortex_a9 = 0;
852 /* Nonzero if generating Thumb instructions. */
853 int thumb_code = 0;
855 /* Nonzero if generating Thumb-1 instructions. */
856 int thumb1_code = 0;
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
859 preprocessor.
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork = 0;
865 /* Nonzero if chip supports Thumb 2. */
866 int arm_arch_thumb2;
868 /* Nonzero if chip supports integer division instruction. */
869 int arm_arch_arm_hwdiv;
870 int arm_arch_thumb_hwdiv;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits = 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool = false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 enum machine_mode output_memory_reference_mode;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register = INVALID_REGNUM;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes[] =
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence[] =
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 /* Initialization code. */
929 struct processors
931 const char *const name;
932 enum processor_type core;
933 const char *arch;
934 enum base_architecture base_arch;
935 const unsigned long flags;
936 const struct tune_params *const tune;
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
942 prefetch_slots, \
943 l1_size, \
944 l1_line_size
946 /* arm generic vectorizer costs. */
947 static const
948 struct cpu_vec_costs arm_default_vec_cost = {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs =
970 /* ALU */
972 0, /* arith. */
973 0, /* logical. */
974 0, /* shift. */
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
978 0, /* log_shift. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
984 0, /* clz. */
985 0, /* rev. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
990 /* MULT SImode */
992 COSTS_N_INSNS (3), /* simple. */
993 COSTS_N_INSNS (3), /* flag_setting. */
994 COSTS_N_INSNS (2), /* extend. */
995 COSTS_N_INSNS (3), /* add. */
996 COSTS_N_INSNS (2), /* extend_add. */
997 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
999 /* MULT DImode */
1001 0, /* simple (N/A). */
1002 0, /* flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* extend. */
1004 0, /* add (N/A). */
1005 COSTS_N_INSNS (4), /* extend_add. */
1006 0 /* idiv (N/A). */
1009 /* LD/ST */
1011 COSTS_N_INSNS (2), /* load. */
1012 COSTS_N_INSNS (2), /* load_sign_extend. */
1013 COSTS_N_INSNS (2), /* ldrd. */
1014 COSTS_N_INSNS (2), /* ldm_1st. */
1015 1, /* ldm_regs_per_insn_1st. */
1016 2, /* ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* loadf. */
1018 COSTS_N_INSNS (5), /* loadd. */
1019 COSTS_N_INSNS (1), /* load_unaligned. */
1020 COSTS_N_INSNS (2), /* store. */
1021 COSTS_N_INSNS (2), /* strd. */
1022 COSTS_N_INSNS (2), /* stm_1st. */
1023 1, /* stm_regs_per_insn_1st. */
1024 2, /* stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* storef. */
1026 COSTS_N_INSNS (1), /* stored. */
1027 COSTS_N_INSNS (1) /* store_unaligned. */
1030 /* FP SFmode */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1046 /* FP DFmode */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1063 /* Vector */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs =
1071 /* ALU */
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 0, /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1081 0, /* extend. */
1082 0, /* extend_arith. */
1083 0, /* bfi. */
1084 0, /* bfx. */
1085 0, /* clz. */
1086 0, /* rev. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1091 /* MULT SImode */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1100 /* MULT DImode */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1105 0, /* add (N/A). */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1110 /* LD/ST */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1131 /* FP SFmode */
1133 COSTS_N_INSNS (36), /* div. */
1134 COSTS_N_INSNS (11), /* mult. */
1135 COSTS_N_INSNS (20), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (9), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (6), /* compare. */
1141 COSTS_N_INSNS (4), /* widen. */
1142 COSTS_N_INSNS (4), /* narrow. */
1143 COSTS_N_INSNS (8), /* toint. */
1144 COSTS_N_INSNS (8), /* fromint. */
1145 COSTS_N_INSNS (8) /* roundint. */
1147 /* FP DFmode */
1149 COSTS_N_INSNS (64), /* div. */
1150 COSTS_N_INSNS (16), /* mult. */
1151 COSTS_N_INSNS (25), /* mult_addsub. */
1152 COSTS_N_INSNS (30), /* fma. */
1153 COSTS_N_INSNS (9), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (6), /* compare. */
1157 COSTS_N_INSNS (6), /* widen. */
1158 COSTS_N_INSNS (6), /* narrow. */
1159 COSTS_N_INSNS (8), /* toint. */
1160 COSTS_N_INSNS (8), /* fromint. */
1161 COSTS_N_INSNS (8) /* roundint. */
1164 /* Vector */
1166 COSTS_N_INSNS (1) /* alu. */
1172 const struct cpu_cost_table cortexa7_extra_costs =
1174 /* ALU */
1176 0, /* arith. */
1177 0, /* logical. */
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1190 0, /* non_exec. */
1191 true /* non_exec_costs_exec. */
1195 /* MULT SImode */
1197 0, /* simple. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1204 /* MULT DImode */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1209 0, /* add. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1211 0 /* idiv (N/A). */
1214 /* LD/ST */
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (3), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (2), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1) /* store_unaligned. */
1235 /* FP SFmode */
1237 COSTS_N_INSNS (15), /* div. */
1238 COSTS_N_INSNS (3), /* mult. */
1239 COSTS_N_INSNS (7), /* mult_addsub. */
1240 COSTS_N_INSNS (7), /* fma. */
1241 COSTS_N_INSNS (3), /* addsub. */
1242 COSTS_N_INSNS (3), /* fpconst. */
1243 COSTS_N_INSNS (3), /* neg. */
1244 COSTS_N_INSNS (3), /* compare. */
1245 COSTS_N_INSNS (3), /* widen. */
1246 COSTS_N_INSNS (3), /* narrow. */
1247 COSTS_N_INSNS (3), /* toint. */
1248 COSTS_N_INSNS (3), /* fromint. */
1249 COSTS_N_INSNS (3) /* roundint. */
1251 /* FP DFmode */
1253 COSTS_N_INSNS (30), /* div. */
1254 COSTS_N_INSNS (6), /* mult. */
1255 COSTS_N_INSNS (10), /* mult_addsub. */
1256 COSTS_N_INSNS (7), /* fma. */
1257 COSTS_N_INSNS (3), /* addsub. */
1258 COSTS_N_INSNS (3), /* fpconst. */
1259 COSTS_N_INSNS (3), /* neg. */
1260 COSTS_N_INSNS (3), /* compare. */
1261 COSTS_N_INSNS (3), /* widen. */
1262 COSTS_N_INSNS (3), /* narrow. */
1263 COSTS_N_INSNS (3), /* toint. */
1264 COSTS_N_INSNS (3), /* fromint. */
1265 COSTS_N_INSNS (3) /* roundint. */
1268 /* Vector */
1270 COSTS_N_INSNS (1) /* alu. */
1274 const struct cpu_cost_table cortexa12_extra_costs =
1276 /* ALU */
1278 0, /* arith. */
1279 0, /* logical. */
1280 0, /* shift. */
1281 COSTS_N_INSNS (1), /* shift_reg. */
1282 COSTS_N_INSNS (1), /* arith_shift. */
1283 COSTS_N_INSNS (1), /* arith_shift_reg. */
1284 COSTS_N_INSNS (1), /* log_shift. */
1285 COSTS_N_INSNS (1), /* log_shift_reg. */
1286 0, /* extend. */
1287 COSTS_N_INSNS (1), /* extend_arith. */
1288 0, /* bfi. */
1289 COSTS_N_INSNS (1), /* bfx. */
1290 COSTS_N_INSNS (1), /* clz. */
1291 COSTS_N_INSNS (1), /* rev. */
1292 0, /* non_exec. */
1293 true /* non_exec_costs_exec. */
1295 /* MULT SImode */
1298 COSTS_N_INSNS (2), /* simple. */
1299 COSTS_N_INSNS (3), /* flag_setting. */
1300 COSTS_N_INSNS (2), /* extend. */
1301 COSTS_N_INSNS (3), /* add. */
1302 COSTS_N_INSNS (2), /* extend_add. */
1303 COSTS_N_INSNS (18) /* idiv. */
1305 /* MULT DImode */
1307 0, /* simple (N/A). */
1308 0, /* flag_setting (N/A). */
1309 COSTS_N_INSNS (3), /* extend. */
1310 0, /* add (N/A). */
1311 COSTS_N_INSNS (3), /* extend_add. */
1312 0 /* idiv (N/A). */
1315 /* LD/ST */
1317 COSTS_N_INSNS (3), /* load. */
1318 COSTS_N_INSNS (3), /* load_sign_extend. */
1319 COSTS_N_INSNS (3), /* ldrd. */
1320 COSTS_N_INSNS (3), /* ldm_1st. */
1321 1, /* ldm_regs_per_insn_1st. */
1322 2, /* ldm_regs_per_insn_subsequent. */
1323 COSTS_N_INSNS (3), /* loadf. */
1324 COSTS_N_INSNS (3), /* loadd. */
1325 0, /* load_unaligned. */
1326 0, /* store. */
1327 0, /* strd. */
1328 0, /* stm_1st. */
1329 1, /* stm_regs_per_insn_1st. */
1330 2, /* stm_regs_per_insn_subsequent. */
1331 COSTS_N_INSNS (2), /* storef. */
1332 COSTS_N_INSNS (2), /* stored. */
1333 0 /* store_unaligned. */
1336 /* FP SFmode */
1338 COSTS_N_INSNS (17), /* div. */
1339 COSTS_N_INSNS (4), /* mult. */
1340 COSTS_N_INSNS (8), /* mult_addsub. */
1341 COSTS_N_INSNS (8), /* fma. */
1342 COSTS_N_INSNS (4), /* addsub. */
1343 COSTS_N_INSNS (2), /* fpconst. */
1344 COSTS_N_INSNS (2), /* neg. */
1345 COSTS_N_INSNS (2), /* compare. */
1346 COSTS_N_INSNS (4), /* widen. */
1347 COSTS_N_INSNS (4), /* narrow. */
1348 COSTS_N_INSNS (4), /* toint. */
1349 COSTS_N_INSNS (4), /* fromint. */
1350 COSTS_N_INSNS (4) /* roundint. */
1352 /* FP DFmode */
1354 COSTS_N_INSNS (31), /* div. */
1355 COSTS_N_INSNS (4), /* mult. */
1356 COSTS_N_INSNS (8), /* mult_addsub. */
1357 COSTS_N_INSNS (8), /* fma. */
1358 COSTS_N_INSNS (4), /* addsub. */
1359 COSTS_N_INSNS (2), /* fpconst. */
1360 COSTS_N_INSNS (2), /* neg. */
1361 COSTS_N_INSNS (2), /* compare. */
1362 COSTS_N_INSNS (4), /* widen. */
1363 COSTS_N_INSNS (4), /* narrow. */
1364 COSTS_N_INSNS (4), /* toint. */
1365 COSTS_N_INSNS (4), /* fromint. */
1366 COSTS_N_INSNS (4) /* roundint. */
1369 /* Vector */
1371 COSTS_N_INSNS (1) /* alu. */
1375 const struct cpu_cost_table cortexa15_extra_costs =
1377 /* ALU */
1379 0, /* arith. */
1380 0, /* logical. */
1381 0, /* shift. */
1382 0, /* shift_reg. */
1383 COSTS_N_INSNS (1), /* arith_shift. */
1384 COSTS_N_INSNS (1), /* arith_shift_reg. */
1385 COSTS_N_INSNS (1), /* log_shift. */
1386 COSTS_N_INSNS (1), /* log_shift_reg. */
1387 0, /* extend. */
1388 COSTS_N_INSNS (1), /* extend_arith. */
1389 COSTS_N_INSNS (1), /* bfi. */
1390 0, /* bfx. */
1391 0, /* clz. */
1392 0, /* rev. */
1393 0, /* non_exec. */
1394 true /* non_exec_costs_exec. */
1396 /* MULT SImode */
1399 COSTS_N_INSNS (2), /* simple. */
1400 COSTS_N_INSNS (3), /* flag_setting. */
1401 COSTS_N_INSNS (2), /* extend. */
1402 COSTS_N_INSNS (2), /* add. */
1403 COSTS_N_INSNS (2), /* extend_add. */
1404 COSTS_N_INSNS (18) /* idiv. */
1406 /* MULT DImode */
1408 0, /* simple (N/A). */
1409 0, /* flag_setting (N/A). */
1410 COSTS_N_INSNS (3), /* extend. */
1411 0, /* add (N/A). */
1412 COSTS_N_INSNS (3), /* extend_add. */
1413 0 /* idiv (N/A). */
1416 /* LD/ST */
1418 COSTS_N_INSNS (3), /* load. */
1419 COSTS_N_INSNS (3), /* load_sign_extend. */
1420 COSTS_N_INSNS (3), /* ldrd. */
1421 COSTS_N_INSNS (4), /* ldm_1st. */
1422 1, /* ldm_regs_per_insn_1st. */
1423 2, /* ldm_regs_per_insn_subsequent. */
1424 COSTS_N_INSNS (4), /* loadf. */
1425 COSTS_N_INSNS (4), /* loadd. */
1426 0, /* load_unaligned. */
1427 0, /* store. */
1428 0, /* strd. */
1429 COSTS_N_INSNS (1), /* stm_1st. */
1430 1, /* stm_regs_per_insn_1st. */
1431 2, /* stm_regs_per_insn_subsequent. */
1432 0, /* storef. */
1433 0, /* stored. */
1434 0 /* store_unaligned. */
1437 /* FP SFmode */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (5), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1453 /* FP DFmode */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1470 /* Vector */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table v7m_extra_costs =
1478 /* ALU */
1480 0, /* arith. */
1481 0, /* logical. */
1482 0, /* shift. */
1483 0, /* shift_reg. */
1484 0, /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 0, /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1488 0, /* extend. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1490 0, /* bfi. */
1491 0, /* bfx. */
1492 0, /* clz. */
1493 0, /* rev. */
1494 COSTS_N_INSNS (1), /* non_exec. */
1495 false /* non_exec_costs_exec. */
1498 /* MULT SImode */
1500 COSTS_N_INSNS (1), /* simple. */
1501 COSTS_N_INSNS (1), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (1), /* add. */
1504 COSTS_N_INSNS (3), /* extend_add. */
1505 COSTS_N_INSNS (8) /* idiv. */
1507 /* MULT DImode */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (2), /* extend. */
1512 0, /* add (N/A). */
1513 COSTS_N_INSNS (3), /* extend_add. */
1514 0 /* idiv (N/A). */
1517 /* LD/ST */
1519 COSTS_N_INSNS (2), /* load. */
1520 0, /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (2), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 1, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (2), /* loadf. */
1526 COSTS_N_INSNS (3), /* loadd. */
1527 COSTS_N_INSNS (1), /* load_unaligned. */
1528 COSTS_N_INSNS (2), /* store. */
1529 COSTS_N_INSNS (3), /* strd. */
1530 COSTS_N_INSNS (2), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 1, /* stm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (2), /* storef. */
1534 COSTS_N_INSNS (3), /* stored. */
1535 COSTS_N_INSNS (1) /* store_unaligned. */
1538 /* FP SFmode */
1540 COSTS_N_INSNS (7), /* div. */
1541 COSTS_N_INSNS (2), /* mult. */
1542 COSTS_N_INSNS (5), /* mult_addsub. */
1543 COSTS_N_INSNS (3), /* fma. */
1544 COSTS_N_INSNS (1), /* addsub. */
1545 0, /* fpconst. */
1546 0, /* neg. */
1547 0, /* compare. */
1548 0, /* widen. */
1549 0, /* narrow. */
1550 0, /* toint. */
1551 0, /* fromint. */
1552 0 /* roundint. */
1554 /* FP DFmode */
1556 COSTS_N_INSNS (15), /* div. */
1557 COSTS_N_INSNS (5), /* mult. */
1558 COSTS_N_INSNS (7), /* mult_addsub. */
1559 COSTS_N_INSNS (7), /* fma. */
1560 COSTS_N_INSNS (3), /* addsub. */
1561 0, /* fpconst. */
1562 0, /* neg. */
1563 0, /* compare. */
1564 0, /* widen. */
1565 0, /* narrow. */
1566 0, /* toint. */
1567 0, /* fromint. */
1568 0 /* roundint. */
1571 /* Vector */
1573 COSTS_N_INSNS (1) /* alu. */
1577 const struct tune_params arm_slowmul_tune =
1579 arm_slowmul_rtx_costs,
1580 NULL,
1581 NULL, /* Sched adj cost. */
1582 3, /* Constant limit. */
1583 5, /* Max cond insns. */
1584 ARM_PREFETCH_NOT_BENEFICIAL,
1585 true, /* Prefer constant pool. */
1586 arm_default_branch_cost,
1587 false, /* Prefer LDRD/STRD. */
1588 {true, true}, /* Prefer non short circuit. */
1589 &arm_default_vec_cost, /* Vectorizer costs. */
1590 false, /* Prefer Neon for 64-bits bitops. */
1591 false, false /* Prefer 32-bit encodings. */
1594 const struct tune_params arm_fastmul_tune =
1596 arm_fastmul_rtx_costs,
1597 NULL,
1598 NULL, /* Sched adj cost. */
1599 1, /* Constant limit. */
1600 5, /* Max cond insns. */
1601 ARM_PREFETCH_NOT_BENEFICIAL,
1602 true, /* Prefer constant pool. */
1603 arm_default_branch_cost,
1604 false, /* Prefer LDRD/STRD. */
1605 {true, true}, /* Prefer non short circuit. */
1606 &arm_default_vec_cost, /* Vectorizer costs. */
1607 false, /* Prefer Neon for 64-bits bitops. */
1608 false, false /* Prefer 32-bit encodings. */
1611 /* StrongARM has early execution of branches, so a sequence that is worth
1612 skipping is shorter. Set max_insns_skipped to a lower value. */
1614 const struct tune_params arm_strongarm_tune =
1616 arm_fastmul_rtx_costs,
1617 NULL,
1618 NULL, /* Sched adj cost. */
1619 1, /* Constant limit. */
1620 3, /* Max cond insns. */
1621 ARM_PREFETCH_NOT_BENEFICIAL,
1622 true, /* Prefer constant pool. */
1623 arm_default_branch_cost,
1624 false, /* Prefer LDRD/STRD. */
1625 {true, true}, /* Prefer non short circuit. */
1626 &arm_default_vec_cost, /* Vectorizer costs. */
1627 false, /* Prefer Neon for 64-bits bitops. */
1628 false, false /* Prefer 32-bit encodings. */
1631 const struct tune_params arm_xscale_tune =
1633 arm_xscale_rtx_costs,
1634 NULL,
1635 xscale_sched_adjust_cost,
1636 2, /* Constant limit. */
1637 3, /* Max cond insns. */
1638 ARM_PREFETCH_NOT_BENEFICIAL,
1639 true, /* Prefer constant pool. */
1640 arm_default_branch_cost,
1641 false, /* Prefer LDRD/STRD. */
1642 {true, true}, /* Prefer non short circuit. */
1643 &arm_default_vec_cost, /* Vectorizer costs. */
1644 false, /* Prefer Neon for 64-bits bitops. */
1645 false, false /* Prefer 32-bit encodings. */
1648 const struct tune_params arm_9e_tune =
1650 arm_9e_rtx_costs,
1651 NULL,
1652 NULL, /* Sched adj cost. */
1653 1, /* Constant limit. */
1654 5, /* Max cond insns. */
1655 ARM_PREFETCH_NOT_BENEFICIAL,
1656 true, /* Prefer constant pool. */
1657 arm_default_branch_cost,
1658 false, /* Prefer LDRD/STRD. */
1659 {true, true}, /* Prefer non short circuit. */
1660 &arm_default_vec_cost, /* Vectorizer costs. */
1661 false, /* Prefer Neon for 64-bits bitops. */
1662 false, false /* Prefer 32-bit encodings. */
1665 const struct tune_params arm_v6t2_tune =
1667 arm_9e_rtx_costs,
1668 NULL,
1669 NULL, /* Sched adj cost. */
1670 1, /* Constant limit. */
1671 5, /* Max cond insns. */
1672 ARM_PREFETCH_NOT_BENEFICIAL,
1673 false, /* Prefer constant pool. */
1674 arm_default_branch_cost,
1675 false, /* Prefer LDRD/STRD. */
1676 {true, true}, /* Prefer non short circuit. */
1677 &arm_default_vec_cost, /* Vectorizer costs. */
1678 false, /* Prefer Neon for 64-bits bitops. */
1679 false, false /* Prefer 32-bit encodings. */
1682 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1683 const struct tune_params arm_cortex_tune =
1685 arm_9e_rtx_costs,
1686 &generic_extra_costs,
1687 NULL, /* Sched adj cost. */
1688 1, /* Constant limit. */
1689 5, /* Max cond insns. */
1690 ARM_PREFETCH_NOT_BENEFICIAL,
1691 false, /* Prefer constant pool. */
1692 arm_default_branch_cost,
1693 false, /* Prefer LDRD/STRD. */
1694 {true, true}, /* Prefer non short circuit. */
1695 &arm_default_vec_cost, /* Vectorizer costs. */
1696 false, /* Prefer Neon for 64-bits bitops. */
1697 false, false /* Prefer 32-bit encodings. */
1700 const struct tune_params arm_cortex_a8_tune =
1702 arm_9e_rtx_costs,
1703 &cortexa8_extra_costs,
1704 NULL, /* Sched adj cost. */
1705 1, /* Constant limit. */
1706 5, /* Max cond insns. */
1707 ARM_PREFETCH_NOT_BENEFICIAL,
1708 false, /* Prefer constant pool. */
1709 arm_default_branch_cost,
1710 false, /* Prefer LDRD/STRD. */
1711 {true, true}, /* Prefer non short circuit. */
1712 &arm_default_vec_cost, /* Vectorizer costs. */
1713 false, /* Prefer Neon for 64-bits bitops. */
1714 false, false /* Prefer 32-bit encodings. */
1717 const struct tune_params arm_cortex_a7_tune =
1719 arm_9e_rtx_costs,
1720 &cortexa7_extra_costs,
1721 NULL,
1722 1, /* Constant limit. */
1723 5, /* Max cond insns. */
1724 ARM_PREFETCH_NOT_BENEFICIAL,
1725 false, /* Prefer constant pool. */
1726 arm_default_branch_cost,
1727 false, /* Prefer LDRD/STRD. */
1728 {true, true}, /* Prefer non short circuit. */
1729 &arm_default_vec_cost, /* Vectorizer costs. */
1730 false, /* Prefer Neon for 64-bits bitops. */
1731 false, false /* Prefer 32-bit encodings. */
1734 const struct tune_params arm_cortex_a15_tune =
1736 arm_9e_rtx_costs,
1737 &cortexa15_extra_costs,
1738 NULL, /* Sched adj cost. */
1739 1, /* Constant limit. */
1740 2, /* Max cond insns. */
1741 ARM_PREFETCH_NOT_BENEFICIAL,
1742 false, /* Prefer constant pool. */
1743 arm_default_branch_cost,
1744 true, /* Prefer LDRD/STRD. */
1745 {true, true}, /* Prefer non short circuit. */
1746 &arm_default_vec_cost, /* Vectorizer costs. */
1747 false, /* Prefer Neon for 64-bits bitops. */
1748 true, true /* Prefer 32-bit encodings. */
1751 const struct tune_params arm_cortex_a53_tune =
1753 arm_9e_rtx_costs,
1754 &cortexa53_extra_costs,
1755 NULL, /* Scheduler cost adjustment. */
1756 1, /* Constant limit. */
1757 5, /* Max cond insns. */
1758 ARM_PREFETCH_NOT_BENEFICIAL,
1759 false, /* Prefer constant pool. */
1760 arm_default_branch_cost,
1761 false, /* Prefer LDRD/STRD. */
1762 {true, true}, /* Prefer non short circuit. */
1763 &arm_default_vec_cost, /* Vectorizer costs. */
1764 false, /* Prefer Neon for 64-bits bitops. */
1765 false, false /* Prefer 32-bit encodings. */
1768 const struct tune_params arm_cortex_a57_tune =
1770 arm_9e_rtx_costs,
1771 &cortexa57_extra_costs,
1772 NULL, /* Scheduler cost adjustment. */
1773 1, /* Constant limit. */
1774 2, /* Max cond insns. */
1775 ARM_PREFETCH_NOT_BENEFICIAL,
1776 false, /* Prefer constant pool. */
1777 arm_default_branch_cost,
1778 true, /* Prefer LDRD/STRD. */
1779 {true, true}, /* Prefer non short circuit. */
1780 &arm_default_vec_cost, /* Vectorizer costs. */
1781 false, /* Prefer Neon for 64-bits bitops. */
1782 true, true /* Prefer 32-bit encodings. */
1785 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1786 less appealing. Set max_insns_skipped to a low value. */
1788 const struct tune_params arm_cortex_a5_tune =
1790 arm_9e_rtx_costs,
1791 NULL,
1792 NULL, /* Sched adj cost. */
1793 1, /* Constant limit. */
1794 1, /* Max cond insns. */
1795 ARM_PREFETCH_NOT_BENEFICIAL,
1796 false, /* Prefer constant pool. */
1797 arm_cortex_a5_branch_cost,
1798 false, /* Prefer LDRD/STRD. */
1799 {false, false}, /* Prefer non short circuit. */
1800 &arm_default_vec_cost, /* Vectorizer costs. */
1801 false, /* Prefer Neon for 64-bits bitops. */
1802 false, false /* Prefer 32-bit encodings. */
1805 const struct tune_params arm_cortex_a9_tune =
1807 arm_9e_rtx_costs,
1808 &cortexa9_extra_costs,
1809 cortex_a9_sched_adjust_cost,
1810 1, /* Constant limit. */
1811 5, /* Max cond insns. */
1812 ARM_PREFETCH_BENEFICIAL(4,32,32),
1813 false, /* Prefer constant pool. */
1814 arm_default_branch_cost,
1815 false, /* Prefer LDRD/STRD. */
1816 {true, true}, /* Prefer non short circuit. */
1817 &arm_default_vec_cost, /* Vectorizer costs. */
1818 false, /* Prefer Neon for 64-bits bitops. */
1819 false, false /* Prefer 32-bit encodings. */
1822 const struct tune_params arm_cortex_a12_tune =
1824 arm_9e_rtx_costs,
1825 &cortexa12_extra_costs,
1826 NULL,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 ARM_PREFETCH_BENEFICIAL(4,32,32),
1830 false, /* Prefer constant pool. */
1831 arm_default_branch_cost,
1832 true, /* Prefer LDRD/STRD. */
1833 {true, true}, /* Prefer non short circuit. */
1834 &arm_default_vec_cost, /* Vectorizer costs. */
1835 false, /* Prefer Neon for 64-bits bitops. */
1836 false, false /* Prefer 32-bit encodings. */
1839 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1840 cycle to execute each. An LDR from the constant pool also takes two cycles
1841 to execute, but mildly increases pipelining opportunity (consecutive
1842 loads/stores can be pipelined together, saving one cycle), and may also
1843 improve icache utilisation. Hence we prefer the constant pool for such
1844 processors. */
1846 const struct tune_params arm_v7m_tune =
1848 arm_9e_rtx_costs,
1849 &v7m_extra_costs,
1850 NULL, /* Sched adj cost. */
1851 1, /* Constant limit. */
1852 2, /* Max cond insns. */
1853 ARM_PREFETCH_NOT_BENEFICIAL,
1854 true, /* Prefer constant pool. */
1855 arm_cortex_m_branch_cost,
1856 false, /* Prefer LDRD/STRD. */
1857 {false, false}, /* Prefer non short circuit. */
1858 &arm_default_vec_cost, /* Vectorizer costs. */
1859 false, /* Prefer Neon for 64-bits bitops. */
1860 false, false /* Prefer 32-bit encodings. */
1863 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1864 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1865 const struct tune_params arm_v6m_tune =
1867 arm_9e_rtx_costs,
1868 NULL,
1869 NULL, /* Sched adj cost. */
1870 1, /* Constant limit. */
1871 5, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost,
1875 false, /* Prefer LDRD/STRD. */
1876 {false, false}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 false, false /* Prefer 32-bit encodings. */
1882 const struct tune_params arm_fa726te_tune =
1884 arm_9e_rtx_costs,
1885 NULL,
1886 fa726te_sched_adjust_cost,
1887 1, /* Constant limit. */
1888 5, /* Max cond insns. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 true, /* Prefer constant pool. */
1891 arm_default_branch_cost,
1892 false, /* Prefer LDRD/STRD. */
1893 {true, true}, /* Prefer non short circuit. */
1894 &arm_default_vec_cost, /* Vectorizer costs. */
1895 false, /* Prefer Neon for 64-bits bitops. */
1896 false, false /* Prefer 32-bit encodings. */
1900 /* Not all of these give usefully different compilation alternatives,
1901 but there is no simple way of generalizing them. */
1902 static const struct processors all_cores[] =
1904 /* ARM Cores */
1905 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1906 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1907 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1908 #include "arm-cores.def"
1909 #undef ARM_CORE
1910 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1913 static const struct processors all_architectures[] =
1915 /* ARM Architectures */
1916 /* We don't specify tuning costs here as it will be figured out
1917 from the core. */
1919 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1920 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1921 #include "arm-arches.def"
1922 #undef ARM_ARCH
1923 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1927 /* These are populated as commandline arguments are processed, or NULL
1928 if not specified. */
1929 static const struct processors *arm_selected_arch;
1930 static const struct processors *arm_selected_cpu;
1931 static const struct processors *arm_selected_tune;
1933 /* The name of the preprocessor macro to define for this architecture. */
1935 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1937 /* Available values for -mfpu=. */
1939 static const struct arm_fpu_desc all_fpus[] =
1941 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1942 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1943 #include "arm-fpus.def"
1944 #undef ARM_FPU
1948 /* Supported TLS relocations. */
1950 enum tls_reloc {
1951 TLS_GD32,
1952 TLS_LDM32,
1953 TLS_LDO32,
1954 TLS_IE32,
1955 TLS_LE32,
1956 TLS_DESCSEQ /* GNU scheme */
1959 /* The maximum number of insns to be used when loading a constant. */
1960 inline static int
1961 arm_constant_limit (bool size_p)
1963 return size_p ? 1 : current_tune->constant_limit;
1966 /* Emit an insn that's a simple single-set. Both the operands must be known
1967 to be valid. */
1968 inline static rtx
1969 emit_set_insn (rtx x, rtx y)
1971 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1974 /* Return the number of bits set in VALUE. */
1975 static unsigned
1976 bit_count (unsigned long value)
1978 unsigned long count = 0;
1980 while (value)
1982 count++;
1983 value &= value - 1; /* Clear the least-significant set bit. */
1986 return count;
1989 typedef struct
1991 enum machine_mode mode;
1992 const char *name;
1993 } arm_fixed_mode_set;
1995 /* A small helper for setting fixed-point library libfuncs. */
1997 static void
1998 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1999 const char *funcname, const char *modename,
2000 int num_suffix)
2002 char buffer[50];
2004 if (num_suffix == 0)
2005 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2006 else
2007 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2009 set_optab_libfunc (optable, mode, buffer);
2012 static void
2013 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2014 enum machine_mode from, const char *funcname,
2015 const char *toname, const char *fromname)
2017 char buffer[50];
2018 const char *maybe_suffix_2 = "";
2020 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2021 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2022 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2023 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2024 maybe_suffix_2 = "2";
2026 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2027 maybe_suffix_2);
2029 set_conv_libfunc (optable, to, from, buffer);
2032 /* Set up library functions unique to ARM. */
2034 static void
2035 arm_init_libfuncs (void)
2037 /* For Linux, we have access to kernel support for atomic operations. */
2038 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2039 init_sync_libfuncs (2 * UNITS_PER_WORD);
2041 /* There are no special library functions unless we are using the
2042 ARM BPABI. */
2043 if (!TARGET_BPABI)
2044 return;
2046 /* The functions below are described in Section 4 of the "Run-Time
2047 ABI for the ARM architecture", Version 1.0. */
2049 /* Double-precision floating-point arithmetic. Table 2. */
2050 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2051 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2052 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2053 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2054 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2056 /* Double-precision comparisons. Table 3. */
2057 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2058 set_optab_libfunc (ne_optab, DFmode, NULL);
2059 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2060 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2061 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2062 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2063 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2065 /* Single-precision floating-point arithmetic. Table 4. */
2066 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2067 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2068 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2069 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2070 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2072 /* Single-precision comparisons. Table 5. */
2073 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2074 set_optab_libfunc (ne_optab, SFmode, NULL);
2075 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2076 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2077 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2078 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2079 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2081 /* Floating-point to integer conversions. Table 6. */
2082 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2083 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2084 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2085 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2086 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2087 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2088 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2089 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2091 /* Conversions between floating types. Table 7. */
2092 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2093 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2095 /* Integer to floating-point conversions. Table 8. */
2096 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2097 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2098 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2099 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2100 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2101 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2102 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2103 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2105 /* Long long. Table 9. */
2106 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2107 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2108 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2109 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2110 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2111 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2112 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2113 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2115 /* Integer (32/32->32) division. \S 4.3.1. */
2116 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2117 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2119 /* The divmod functions are designed so that they can be used for
2120 plain division, even though they return both the quotient and the
2121 remainder. The quotient is returned in the usual location (i.e.,
2122 r0 for SImode, {r0, r1} for DImode), just as would be expected
2123 for an ordinary division routine. Because the AAPCS calling
2124 conventions specify that all of { r0, r1, r2, r3 } are
2125 callee-saved registers, there is no need to tell the compiler
2126 explicitly that those registers are clobbered by these
2127 routines. */
2128 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2129 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2131 /* For SImode division the ABI provides div-without-mod routines,
2132 which are faster. */
2133 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2134 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2136 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2137 divmod libcalls instead. */
2138 set_optab_libfunc (smod_optab, DImode, NULL);
2139 set_optab_libfunc (umod_optab, DImode, NULL);
2140 set_optab_libfunc (smod_optab, SImode, NULL);
2141 set_optab_libfunc (umod_optab, SImode, NULL);
2143 /* Half-precision float operations. The compiler handles all operations
2144 with NULL libfuncs by converting the SFmode. */
2145 switch (arm_fp16_format)
2147 case ARM_FP16_FORMAT_IEEE:
2148 case ARM_FP16_FORMAT_ALTERNATIVE:
2150 /* Conversions. */
2151 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2152 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2153 ? "__gnu_f2h_ieee"
2154 : "__gnu_f2h_alternative"));
2155 set_conv_libfunc (sext_optab, SFmode, HFmode,
2156 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2157 ? "__gnu_h2f_ieee"
2158 : "__gnu_h2f_alternative"));
2160 /* Arithmetic. */
2161 set_optab_libfunc (add_optab, HFmode, NULL);
2162 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2163 set_optab_libfunc (smul_optab, HFmode, NULL);
2164 set_optab_libfunc (neg_optab, HFmode, NULL);
2165 set_optab_libfunc (sub_optab, HFmode, NULL);
2167 /* Comparisons. */
2168 set_optab_libfunc (eq_optab, HFmode, NULL);
2169 set_optab_libfunc (ne_optab, HFmode, NULL);
2170 set_optab_libfunc (lt_optab, HFmode, NULL);
2171 set_optab_libfunc (le_optab, HFmode, NULL);
2172 set_optab_libfunc (ge_optab, HFmode, NULL);
2173 set_optab_libfunc (gt_optab, HFmode, NULL);
2174 set_optab_libfunc (unord_optab, HFmode, NULL);
2175 break;
2177 default:
2178 break;
2181 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2183 const arm_fixed_mode_set fixed_arith_modes[] =
2185 { QQmode, "qq" },
2186 { UQQmode, "uqq" },
2187 { HQmode, "hq" },
2188 { UHQmode, "uhq" },
2189 { SQmode, "sq" },
2190 { USQmode, "usq" },
2191 { DQmode, "dq" },
2192 { UDQmode, "udq" },
2193 { TQmode, "tq" },
2194 { UTQmode, "utq" },
2195 { HAmode, "ha" },
2196 { UHAmode, "uha" },
2197 { SAmode, "sa" },
2198 { USAmode, "usa" },
2199 { DAmode, "da" },
2200 { UDAmode, "uda" },
2201 { TAmode, "ta" },
2202 { UTAmode, "uta" }
2204 const arm_fixed_mode_set fixed_conv_modes[] =
2206 { QQmode, "qq" },
2207 { UQQmode, "uqq" },
2208 { HQmode, "hq" },
2209 { UHQmode, "uhq" },
2210 { SQmode, "sq" },
2211 { USQmode, "usq" },
2212 { DQmode, "dq" },
2213 { UDQmode, "udq" },
2214 { TQmode, "tq" },
2215 { UTQmode, "utq" },
2216 { HAmode, "ha" },
2217 { UHAmode, "uha" },
2218 { SAmode, "sa" },
2219 { USAmode, "usa" },
2220 { DAmode, "da" },
2221 { UDAmode, "uda" },
2222 { TAmode, "ta" },
2223 { UTAmode, "uta" },
2224 { QImode, "qi" },
2225 { HImode, "hi" },
2226 { SImode, "si" },
2227 { DImode, "di" },
2228 { TImode, "ti" },
2229 { SFmode, "sf" },
2230 { DFmode, "df" }
2232 unsigned int i, j;
2234 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2236 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2237 "add", fixed_arith_modes[i].name, 3);
2238 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2239 "ssadd", fixed_arith_modes[i].name, 3);
2240 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2241 "usadd", fixed_arith_modes[i].name, 3);
2242 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2243 "sub", fixed_arith_modes[i].name, 3);
2244 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2245 "sssub", fixed_arith_modes[i].name, 3);
2246 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2247 "ussub", fixed_arith_modes[i].name, 3);
2248 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2249 "mul", fixed_arith_modes[i].name, 3);
2250 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2251 "ssmul", fixed_arith_modes[i].name, 3);
2252 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2253 "usmul", fixed_arith_modes[i].name, 3);
2254 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2255 "div", fixed_arith_modes[i].name, 3);
2256 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2257 "udiv", fixed_arith_modes[i].name, 3);
2258 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2259 "ssdiv", fixed_arith_modes[i].name, 3);
2260 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2261 "usdiv", fixed_arith_modes[i].name, 3);
2262 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2263 "neg", fixed_arith_modes[i].name, 2);
2264 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2265 "ssneg", fixed_arith_modes[i].name, 2);
2266 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2267 "usneg", fixed_arith_modes[i].name, 2);
2268 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2269 "ashl", fixed_arith_modes[i].name, 3);
2270 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2271 "ashr", fixed_arith_modes[i].name, 3);
2272 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2273 "lshr", fixed_arith_modes[i].name, 3);
2274 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2275 "ssashl", fixed_arith_modes[i].name, 3);
2276 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2277 "usashl", fixed_arith_modes[i].name, 3);
2278 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2279 "cmp", fixed_arith_modes[i].name, 2);
2282 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2283 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2285 if (i == j
2286 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2287 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2288 continue;
2290 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2291 fixed_conv_modes[j].mode, "fract",
2292 fixed_conv_modes[i].name,
2293 fixed_conv_modes[j].name);
2294 arm_set_fixed_conv_libfunc (satfract_optab,
2295 fixed_conv_modes[i].mode,
2296 fixed_conv_modes[j].mode, "satfract",
2297 fixed_conv_modes[i].name,
2298 fixed_conv_modes[j].name);
2299 arm_set_fixed_conv_libfunc (fractuns_optab,
2300 fixed_conv_modes[i].mode,
2301 fixed_conv_modes[j].mode, "fractuns",
2302 fixed_conv_modes[i].name,
2303 fixed_conv_modes[j].name);
2304 arm_set_fixed_conv_libfunc (satfractuns_optab,
2305 fixed_conv_modes[i].mode,
2306 fixed_conv_modes[j].mode, "satfractuns",
2307 fixed_conv_modes[i].name,
2308 fixed_conv_modes[j].name);
2312 if (TARGET_AAPCS_BASED)
2313 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2316 /* On AAPCS systems, this is the "struct __va_list". */
2317 static GTY(()) tree va_list_type;
2319 /* Return the type to use as __builtin_va_list. */
2320 static tree
2321 arm_build_builtin_va_list (void)
2323 tree va_list_name;
2324 tree ap_field;
2326 if (!TARGET_AAPCS_BASED)
2327 return std_build_builtin_va_list ();
2329 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2330 defined as:
2332 struct __va_list
2334 void *__ap;
2337 The C Library ABI further reinforces this definition in \S
2338 4.1.
2340 We must follow this definition exactly. The structure tag
2341 name is visible in C++ mangled names, and thus forms a part
2342 of the ABI. The field name may be used by people who
2343 #include <stdarg.h>. */
2344 /* Create the type. */
2345 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2346 /* Give it the required name. */
2347 va_list_name = build_decl (BUILTINS_LOCATION,
2348 TYPE_DECL,
2349 get_identifier ("__va_list"),
2350 va_list_type);
2351 DECL_ARTIFICIAL (va_list_name) = 1;
2352 TYPE_NAME (va_list_type) = va_list_name;
2353 TYPE_STUB_DECL (va_list_type) = va_list_name;
2354 /* Create the __ap field. */
2355 ap_field = build_decl (BUILTINS_LOCATION,
2356 FIELD_DECL,
2357 get_identifier ("__ap"),
2358 ptr_type_node);
2359 DECL_ARTIFICIAL (ap_field) = 1;
2360 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2361 TYPE_FIELDS (va_list_type) = ap_field;
2362 /* Compute its layout. */
2363 layout_type (va_list_type);
2365 return va_list_type;
2368 /* Return an expression of type "void *" pointing to the next
2369 available argument in a variable-argument list. VALIST is the
2370 user-level va_list object, of type __builtin_va_list. */
2371 static tree
2372 arm_extract_valist_ptr (tree valist)
2374 if (TREE_TYPE (valist) == error_mark_node)
2375 return error_mark_node;
2377 /* On an AAPCS target, the pointer is stored within "struct
2378 va_list". */
2379 if (TARGET_AAPCS_BASED)
2381 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2382 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2383 valist, ap_field, NULL_TREE);
2386 return valist;
2389 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2390 static void
2391 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2393 valist = arm_extract_valist_ptr (valist);
2394 std_expand_builtin_va_start (valist, nextarg);
2397 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2398 static tree
2399 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2400 gimple_seq *post_p)
2402 valist = arm_extract_valist_ptr (valist);
2403 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2406 /* Fix up any incompatible options that the user has specified. */
2407 static void
2408 arm_option_override (void)
2410 if (global_options_set.x_arm_arch_option)
2411 arm_selected_arch = &all_architectures[arm_arch_option];
2413 if (global_options_set.x_arm_cpu_option)
2415 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2416 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2419 if (global_options_set.x_arm_tune_option)
2420 arm_selected_tune = &all_cores[(int) arm_tune_option];
2422 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2423 SUBTARGET_OVERRIDE_OPTIONS;
2424 #endif
2426 if (arm_selected_arch)
2428 if (arm_selected_cpu)
2430 /* Check for conflict between mcpu and march. */
2431 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2433 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2434 arm_selected_cpu->name, arm_selected_arch->name);
2435 /* -march wins for code generation.
2436 -mcpu wins for default tuning. */
2437 if (!arm_selected_tune)
2438 arm_selected_tune = arm_selected_cpu;
2440 arm_selected_cpu = arm_selected_arch;
2442 else
2443 /* -mcpu wins. */
2444 arm_selected_arch = NULL;
2446 else
2447 /* Pick a CPU based on the architecture. */
2448 arm_selected_cpu = arm_selected_arch;
2451 /* If the user did not specify a processor, choose one for them. */
2452 if (!arm_selected_cpu)
2454 const struct processors * sel;
2455 unsigned int sought;
2457 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2458 if (!arm_selected_cpu->name)
2460 #ifdef SUBTARGET_CPU_DEFAULT
2461 /* Use the subtarget default CPU if none was specified by
2462 configure. */
2463 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2464 #endif
2465 /* Default to ARM6. */
2466 if (!arm_selected_cpu->name)
2467 arm_selected_cpu = &all_cores[arm6];
2470 sel = arm_selected_cpu;
2471 insn_flags = sel->flags;
2473 /* Now check to see if the user has specified some command line
2474 switch that require certain abilities from the cpu. */
2475 sought = 0;
2477 if (TARGET_INTERWORK || TARGET_THUMB)
2479 sought |= (FL_THUMB | FL_MODE32);
2481 /* There are no ARM processors that support both APCS-26 and
2482 interworking. Therefore we force FL_MODE26 to be removed
2483 from insn_flags here (if it was set), so that the search
2484 below will always be able to find a compatible processor. */
2485 insn_flags &= ~FL_MODE26;
2488 if (sought != 0 && ((sought & insn_flags) != sought))
2490 /* Try to locate a CPU type that supports all of the abilities
2491 of the default CPU, plus the extra abilities requested by
2492 the user. */
2493 for (sel = all_cores; sel->name != NULL; sel++)
2494 if ((sel->flags & sought) == (sought | insn_flags))
2495 break;
2497 if (sel->name == NULL)
2499 unsigned current_bit_count = 0;
2500 const struct processors * best_fit = NULL;
2502 /* Ideally we would like to issue an error message here
2503 saying that it was not possible to find a CPU compatible
2504 with the default CPU, but which also supports the command
2505 line options specified by the programmer, and so they
2506 ought to use the -mcpu=<name> command line option to
2507 override the default CPU type.
2509 If we cannot find a cpu that has both the
2510 characteristics of the default cpu and the given
2511 command line options we scan the array again looking
2512 for a best match. */
2513 for (sel = all_cores; sel->name != NULL; sel++)
2514 if ((sel->flags & sought) == sought)
2516 unsigned count;
2518 count = bit_count (sel->flags & insn_flags);
2520 if (count >= current_bit_count)
2522 best_fit = sel;
2523 current_bit_count = count;
2527 gcc_assert (best_fit);
2528 sel = best_fit;
2531 arm_selected_cpu = sel;
2535 gcc_assert (arm_selected_cpu);
2536 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2537 if (!arm_selected_tune)
2538 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2540 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2541 insn_flags = arm_selected_cpu->flags;
2542 arm_base_arch = arm_selected_cpu->base_arch;
2544 arm_tune = arm_selected_tune->core;
2545 tune_flags = arm_selected_tune->flags;
2546 current_tune = arm_selected_tune->tune;
2548 /* Make sure that the processor choice does not conflict with any of the
2549 other command line choices. */
2550 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2551 error ("target CPU does not support ARM mode");
2553 /* BPABI targets use linker tricks to allow interworking on cores
2554 without thumb support. */
2555 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2557 warning (0, "target CPU does not support interworking" );
2558 target_flags &= ~MASK_INTERWORK;
2561 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2563 warning (0, "target CPU does not support THUMB instructions");
2564 target_flags &= ~MASK_THUMB;
2567 if (TARGET_APCS_FRAME && TARGET_THUMB)
2569 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2570 target_flags &= ~MASK_APCS_FRAME;
2573 /* Callee super interworking implies thumb interworking. Adding
2574 this to the flags here simplifies the logic elsewhere. */
2575 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2576 target_flags |= MASK_INTERWORK;
2578 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2579 from here where no function is being compiled currently. */
2580 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2581 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2583 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2584 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2586 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2588 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2589 target_flags |= MASK_APCS_FRAME;
2592 if (TARGET_POKE_FUNCTION_NAME)
2593 target_flags |= MASK_APCS_FRAME;
2595 if (TARGET_APCS_REENT && flag_pic)
2596 error ("-fpic and -mapcs-reent are incompatible");
2598 if (TARGET_APCS_REENT)
2599 warning (0, "APCS reentrant code not supported. Ignored");
2601 /* If this target is normally configured to use APCS frames, warn if they
2602 are turned off and debugging is turned on. */
2603 if (TARGET_ARM
2604 && write_symbols != NO_DEBUG
2605 && !TARGET_APCS_FRAME
2606 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2607 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2609 if (TARGET_APCS_FLOAT)
2610 warning (0, "passing floating point arguments in fp regs not yet supported");
2612 if (TARGET_LITTLE_WORDS)
2613 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2614 "will be removed in a future release");
2616 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2617 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2618 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2619 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2620 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2621 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2622 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2623 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2624 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2625 arm_arch6m = arm_arch6 && !arm_arch_notm;
2626 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2627 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2628 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2629 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2630 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2632 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2633 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2634 thumb_code = TARGET_ARM == 0;
2635 thumb1_code = TARGET_THUMB1 != 0;
2636 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2637 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2638 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2639 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2640 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2641 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2642 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2643 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2644 if (arm_restrict_it == 2)
2645 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2647 if (!TARGET_THUMB2)
2648 arm_restrict_it = 0;
2650 /* If we are not using the default (ARM mode) section anchor offset
2651 ranges, then set the correct ranges now. */
2652 if (TARGET_THUMB1)
2654 /* Thumb-1 LDR instructions cannot have negative offsets.
2655 Permissible positive offset ranges are 5-bit (for byte loads),
2656 6-bit (for halfword loads), or 7-bit (for word loads).
2657 Empirical results suggest a 7-bit anchor range gives the best
2658 overall code size. */
2659 targetm.min_anchor_offset = 0;
2660 targetm.max_anchor_offset = 127;
2662 else if (TARGET_THUMB2)
2664 /* The minimum is set such that the total size of the block
2665 for a particular anchor is 248 + 1 + 4095 bytes, which is
2666 divisible by eight, ensuring natural spacing of anchors. */
2667 targetm.min_anchor_offset = -248;
2668 targetm.max_anchor_offset = 4095;
2671 /* V5 code we generate is completely interworking capable, so we turn off
2672 TARGET_INTERWORK here to avoid many tests later on. */
2674 /* XXX However, we must pass the right pre-processor defines to CPP
2675 or GLD can get confused. This is a hack. */
2676 if (TARGET_INTERWORK)
2677 arm_cpp_interwork = 1;
2679 if (arm_arch5)
2680 target_flags &= ~MASK_INTERWORK;
2682 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2683 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2685 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2686 error ("iwmmxt abi requires an iwmmxt capable cpu");
2688 if (!global_options_set.x_arm_fpu_index)
2690 const char *target_fpu_name;
2691 bool ok;
2693 #ifdef FPUTYPE_DEFAULT
2694 target_fpu_name = FPUTYPE_DEFAULT;
2695 #else
2696 target_fpu_name = "vfp";
2697 #endif
2699 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2700 CL_TARGET);
2701 gcc_assert (ok);
2704 arm_fpu_desc = &all_fpus[arm_fpu_index];
2706 switch (arm_fpu_desc->model)
2708 case ARM_FP_MODEL_VFP:
2709 arm_fpu_attr = FPU_VFP;
2710 break;
2712 default:
2713 gcc_unreachable();
2716 if (TARGET_AAPCS_BASED)
2718 if (TARGET_CALLER_INTERWORKING)
2719 error ("AAPCS does not support -mcaller-super-interworking");
2720 else
2721 if (TARGET_CALLEE_INTERWORKING)
2722 error ("AAPCS does not support -mcallee-super-interworking");
2725 /* iWMMXt and NEON are incompatible. */
2726 if (TARGET_IWMMXT && TARGET_NEON)
2727 error ("iWMMXt and NEON are incompatible");
2729 /* iWMMXt unsupported under Thumb mode. */
2730 if (TARGET_THUMB && TARGET_IWMMXT)
2731 error ("iWMMXt unsupported under Thumb mode");
2733 /* __fp16 support currently assumes the core has ldrh. */
2734 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2735 sorry ("__fp16 and no ldrh");
2737 /* If soft-float is specified then don't use FPU. */
2738 if (TARGET_SOFT_FLOAT)
2739 arm_fpu_attr = FPU_NONE;
2741 if (TARGET_AAPCS_BASED)
2743 if (arm_abi == ARM_ABI_IWMMXT)
2744 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2745 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2746 && TARGET_HARD_FLOAT
2747 && TARGET_VFP)
2748 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2749 else
2750 arm_pcs_default = ARM_PCS_AAPCS;
2752 else
2754 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2755 sorry ("-mfloat-abi=hard and VFP");
2757 if (arm_abi == ARM_ABI_APCS)
2758 arm_pcs_default = ARM_PCS_APCS;
2759 else
2760 arm_pcs_default = ARM_PCS_ATPCS;
2763 /* For arm2/3 there is no need to do any scheduling if we are doing
2764 software floating-point. */
2765 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2766 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2768 /* Use the cp15 method if it is available. */
2769 if (target_thread_pointer == TP_AUTO)
2771 if (arm_arch6k && !TARGET_THUMB1)
2772 target_thread_pointer = TP_CP15;
2773 else
2774 target_thread_pointer = TP_SOFT;
2777 if (TARGET_HARD_TP && TARGET_THUMB1)
2778 error ("can not use -mtp=cp15 with 16-bit Thumb");
2780 /* Override the default structure alignment for AAPCS ABI. */
2781 if (!global_options_set.x_arm_structure_size_boundary)
2783 if (TARGET_AAPCS_BASED)
2784 arm_structure_size_boundary = 8;
2786 else
2788 if (arm_structure_size_boundary != 8
2789 && arm_structure_size_boundary != 32
2790 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2792 if (ARM_DOUBLEWORD_ALIGN)
2793 warning (0,
2794 "structure size boundary can only be set to 8, 32 or 64");
2795 else
2796 warning (0, "structure size boundary can only be set to 8 or 32");
2797 arm_structure_size_boundary
2798 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2802 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2804 error ("RTP PIC is incompatible with Thumb");
2805 flag_pic = 0;
2808 /* If stack checking is disabled, we can use r10 as the PIC register,
2809 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2810 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2812 if (TARGET_VXWORKS_RTP)
2813 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2814 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2817 if (flag_pic && TARGET_VXWORKS_RTP)
2818 arm_pic_register = 9;
2820 if (arm_pic_register_string != NULL)
2822 int pic_register = decode_reg_name (arm_pic_register_string);
2824 if (!flag_pic)
2825 warning (0, "-mpic-register= is useless without -fpic");
2827 /* Prevent the user from choosing an obviously stupid PIC register. */
2828 else if (pic_register < 0 || call_used_regs[pic_register]
2829 || pic_register == HARD_FRAME_POINTER_REGNUM
2830 || pic_register == STACK_POINTER_REGNUM
2831 || pic_register >= PC_REGNUM
2832 || (TARGET_VXWORKS_RTP
2833 && (unsigned int) pic_register != arm_pic_register))
2834 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2835 else
2836 arm_pic_register = pic_register;
2839 if (TARGET_VXWORKS_RTP
2840 && !global_options_set.x_arm_pic_data_is_text_relative)
2841 arm_pic_data_is_text_relative = 0;
2843 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2844 if (fix_cm3_ldrd == 2)
2846 if (arm_selected_cpu->core == cortexm3)
2847 fix_cm3_ldrd = 1;
2848 else
2849 fix_cm3_ldrd = 0;
2852 /* Enable -munaligned-access by default for
2853 - all ARMv6 architecture-based processors
2854 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2855 - ARMv8 architecture-base processors.
2857 Disable -munaligned-access by default for
2858 - all pre-ARMv6 architecture-based processors
2859 - ARMv6-M architecture-based processors. */
2861 if (unaligned_access == 2)
2863 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2864 unaligned_access = 1;
2865 else
2866 unaligned_access = 0;
2868 else if (unaligned_access == 1
2869 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2871 warning (0, "target CPU does not support unaligned accesses");
2872 unaligned_access = 0;
2875 if (TARGET_THUMB1 && flag_schedule_insns)
2877 /* Don't warn since it's on by default in -O2. */
2878 flag_schedule_insns = 0;
2881 if (optimize_size)
2883 /* If optimizing for size, bump the number of instructions that we
2884 are prepared to conditionally execute (even on a StrongARM). */
2885 max_insns_skipped = 6;
2887 else
2888 max_insns_skipped = current_tune->max_insns_skipped;
2890 /* Hot/Cold partitioning is not currently supported, since we can't
2891 handle literal pool placement in that case. */
2892 if (flag_reorder_blocks_and_partition)
2894 inform (input_location,
2895 "-freorder-blocks-and-partition not supported on this architecture");
2896 flag_reorder_blocks_and_partition = 0;
2897 flag_reorder_blocks = 1;
2900 if (flag_pic)
2901 /* Hoisting PIC address calculations more aggressively provides a small,
2902 but measurable, size reduction for PIC code. Therefore, we decrease
2903 the bar for unrestricted expression hoisting to the cost of PIC address
2904 calculation, which is 2 instructions. */
2905 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2906 global_options.x_param_values,
2907 global_options_set.x_param_values);
2909 /* ARM EABI defaults to strict volatile bitfields. */
2910 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2911 && abi_version_at_least(2))
2912 flag_strict_volatile_bitfields = 1;
2914 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2915 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2916 if (flag_prefetch_loop_arrays < 0
2917 && HAVE_prefetch
2918 && optimize >= 3
2919 && current_tune->num_prefetch_slots > 0)
2920 flag_prefetch_loop_arrays = 1;
2922 /* Set up parameters to be used in prefetching algorithm. Do not override the
2923 defaults unless we are tuning for a core we have researched values for. */
2924 if (current_tune->num_prefetch_slots > 0)
2925 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2926 current_tune->num_prefetch_slots,
2927 global_options.x_param_values,
2928 global_options_set.x_param_values);
2929 if (current_tune->l1_cache_line_size >= 0)
2930 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2931 current_tune->l1_cache_line_size,
2932 global_options.x_param_values,
2933 global_options_set.x_param_values);
2934 if (current_tune->l1_cache_size >= 0)
2935 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2936 current_tune->l1_cache_size,
2937 global_options.x_param_values,
2938 global_options_set.x_param_values);
2940 /* Use Neon to perform 64-bits operations rather than core
2941 registers. */
2942 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2943 if (use_neon_for_64bits == 1)
2944 prefer_neon_for_64bits = true;
2946 /* Use the alternative scheduling-pressure algorithm by default. */
2947 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2948 global_options.x_param_values,
2949 global_options_set.x_param_values);
2951 /* Disable shrink-wrap when optimizing function for size, since it tends to
2952 generate additional returns. */
2953 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2954 flag_shrink_wrap = false;
2955 /* TBD: Dwarf info for apcs frame is not handled yet. */
2956 if (TARGET_APCS_FRAME)
2957 flag_shrink_wrap = false;
2959 /* We only support -mslow-flash-data on armv7-m targets. */
2960 if (target_slow_flash_data
2961 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2962 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2963 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2965 /* Currently, for slow flash data, we just disable literal pools. */
2966 if (target_slow_flash_data)
2967 arm_disable_literal_pool = true;
2969 /* Register global variables with the garbage collector. */
2970 arm_add_gc_roots ();
2973 static void
2974 arm_add_gc_roots (void)
2976 gcc_obstack_init(&minipool_obstack);
2977 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2980 /* A table of known ARM exception types.
2981 For use with the interrupt function attribute. */
2983 typedef struct
2985 const char *const arg;
2986 const unsigned long return_value;
2988 isr_attribute_arg;
2990 static const isr_attribute_arg isr_attribute_args [] =
2992 { "IRQ", ARM_FT_ISR },
2993 { "irq", ARM_FT_ISR },
2994 { "FIQ", ARM_FT_FIQ },
2995 { "fiq", ARM_FT_FIQ },
2996 { "ABORT", ARM_FT_ISR },
2997 { "abort", ARM_FT_ISR },
2998 { "ABORT", ARM_FT_ISR },
2999 { "abort", ARM_FT_ISR },
3000 { "UNDEF", ARM_FT_EXCEPTION },
3001 { "undef", ARM_FT_EXCEPTION },
3002 { "SWI", ARM_FT_EXCEPTION },
3003 { "swi", ARM_FT_EXCEPTION },
3004 { NULL, ARM_FT_NORMAL }
3007 /* Returns the (interrupt) function type of the current
3008 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3010 static unsigned long
3011 arm_isr_value (tree argument)
3013 const isr_attribute_arg * ptr;
3014 const char * arg;
3016 if (!arm_arch_notm)
3017 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3019 /* No argument - default to IRQ. */
3020 if (argument == NULL_TREE)
3021 return ARM_FT_ISR;
3023 /* Get the value of the argument. */
3024 if (TREE_VALUE (argument) == NULL_TREE
3025 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3026 return ARM_FT_UNKNOWN;
3028 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3030 /* Check it against the list of known arguments. */
3031 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3032 if (streq (arg, ptr->arg))
3033 return ptr->return_value;
3035 /* An unrecognized interrupt type. */
3036 return ARM_FT_UNKNOWN;
3039 /* Computes the type of the current function. */
3041 static unsigned long
3042 arm_compute_func_type (void)
3044 unsigned long type = ARM_FT_UNKNOWN;
3045 tree a;
3046 tree attr;
3048 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3050 /* Decide if the current function is volatile. Such functions
3051 never return, and many memory cycles can be saved by not storing
3052 register values that will never be needed again. This optimization
3053 was added to speed up context switching in a kernel application. */
3054 if (optimize > 0
3055 && (TREE_NOTHROW (current_function_decl)
3056 || !(flag_unwind_tables
3057 || (flag_exceptions
3058 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3059 && TREE_THIS_VOLATILE (current_function_decl))
3060 type |= ARM_FT_VOLATILE;
3062 if (cfun->static_chain_decl != NULL)
3063 type |= ARM_FT_NESTED;
3065 attr = DECL_ATTRIBUTES (current_function_decl);
3067 a = lookup_attribute ("naked", attr);
3068 if (a != NULL_TREE)
3069 type |= ARM_FT_NAKED;
3071 a = lookup_attribute ("isr", attr);
3072 if (a == NULL_TREE)
3073 a = lookup_attribute ("interrupt", attr);
3075 if (a == NULL_TREE)
3076 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3077 else
3078 type |= arm_isr_value (TREE_VALUE (a));
3080 return type;
3083 /* Returns the type of the current function. */
3085 unsigned long
3086 arm_current_func_type (void)
3088 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3089 cfun->machine->func_type = arm_compute_func_type ();
3091 return cfun->machine->func_type;
3094 bool
3095 arm_allocate_stack_slots_for_args (void)
3097 /* Naked functions should not allocate stack slots for arguments. */
3098 return !IS_NAKED (arm_current_func_type ());
3101 static bool
3102 arm_warn_func_return (tree decl)
3104 /* Naked functions are implemented entirely in assembly, including the
3105 return sequence, so suppress warnings about this. */
3106 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3110 /* Output assembler code for a block containing the constant parts
3111 of a trampoline, leaving space for the variable parts.
3113 On the ARM, (if r8 is the static chain regnum, and remembering that
3114 referencing pc adds an offset of 8) the trampoline looks like:
3115 ldr r8, [pc, #0]
3116 ldr pc, [pc]
3117 .word static chain value
3118 .word function's address
3119 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3121 static void
3122 arm_asm_trampoline_template (FILE *f)
3124 if (TARGET_ARM)
3126 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3127 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3129 else if (TARGET_THUMB2)
3131 /* The Thumb-2 trampoline is similar to the arm implementation.
3132 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3133 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3134 STATIC_CHAIN_REGNUM, PC_REGNUM);
3135 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3137 else
3139 ASM_OUTPUT_ALIGN (f, 2);
3140 fprintf (f, "\t.code\t16\n");
3141 fprintf (f, ".Ltrampoline_start:\n");
3142 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3143 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3144 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3145 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3146 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3147 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3149 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3150 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3153 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3155 static void
3156 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3158 rtx fnaddr, mem, a_tramp;
3160 emit_block_move (m_tramp, assemble_trampoline_template (),
3161 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3163 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3164 emit_move_insn (mem, chain_value);
3166 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3167 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3168 emit_move_insn (mem, fnaddr);
3170 a_tramp = XEXP (m_tramp, 0);
3171 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3172 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3173 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3176 /* Thumb trampolines should be entered in thumb mode, so set
3177 the bottom bit of the address. */
3179 static rtx
3180 arm_trampoline_adjust_address (rtx addr)
3182 if (TARGET_THUMB)
3183 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3184 NULL, 0, OPTAB_LIB_WIDEN);
3185 return addr;
3188 /* Return 1 if it is possible to return using a single instruction.
3189 If SIBLING is non-null, this is a test for a return before a sibling
3190 call. SIBLING is the call insn, so we can examine its register usage. */
3193 use_return_insn (int iscond, rtx sibling)
3195 int regno;
3196 unsigned int func_type;
3197 unsigned long saved_int_regs;
3198 unsigned HOST_WIDE_INT stack_adjust;
3199 arm_stack_offsets *offsets;
3201 /* Never use a return instruction before reload has run. */
3202 if (!reload_completed)
3203 return 0;
3205 func_type = arm_current_func_type ();
3207 /* Naked, volatile and stack alignment functions need special
3208 consideration. */
3209 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3210 return 0;
3212 /* So do interrupt functions that use the frame pointer and Thumb
3213 interrupt functions. */
3214 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3215 return 0;
3217 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3218 && !optimize_function_for_size_p (cfun))
3219 return 0;
3221 offsets = arm_get_frame_offsets ();
3222 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3224 /* As do variadic functions. */
3225 if (crtl->args.pretend_args_size
3226 || cfun->machine->uses_anonymous_args
3227 /* Or if the function calls __builtin_eh_return () */
3228 || crtl->calls_eh_return
3229 /* Or if the function calls alloca */
3230 || cfun->calls_alloca
3231 /* Or if there is a stack adjustment. However, if the stack pointer
3232 is saved on the stack, we can use a pre-incrementing stack load. */
3233 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3234 && stack_adjust == 4)))
3235 return 0;
3237 saved_int_regs = offsets->saved_regs_mask;
3239 /* Unfortunately, the insn
3241 ldmib sp, {..., sp, ...}
3243 triggers a bug on most SA-110 based devices, such that the stack
3244 pointer won't be correctly restored if the instruction takes a
3245 page fault. We work around this problem by popping r3 along with
3246 the other registers, since that is never slower than executing
3247 another instruction.
3249 We test for !arm_arch5 here, because code for any architecture
3250 less than this could potentially be run on one of the buggy
3251 chips. */
3252 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3254 /* Validate that r3 is a call-clobbered register (always true in
3255 the default abi) ... */
3256 if (!call_used_regs[3])
3257 return 0;
3259 /* ... that it isn't being used for a return value ... */
3260 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3261 return 0;
3263 /* ... or for a tail-call argument ... */
3264 if (sibling)
3266 gcc_assert (CALL_P (sibling));
3268 if (find_regno_fusage (sibling, USE, 3))
3269 return 0;
3272 /* ... and that there are no call-saved registers in r0-r2
3273 (always true in the default ABI). */
3274 if (saved_int_regs & 0x7)
3275 return 0;
3278 /* Can't be done if interworking with Thumb, and any registers have been
3279 stacked. */
3280 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3281 return 0;
3283 /* On StrongARM, conditional returns are expensive if they aren't
3284 taken and multiple registers have been stacked. */
3285 if (iscond && arm_tune_strongarm)
3287 /* Conditional return when just the LR is stored is a simple
3288 conditional-load instruction, that's not expensive. */
3289 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3290 return 0;
3292 if (flag_pic
3293 && arm_pic_register != INVALID_REGNUM
3294 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3295 return 0;
3298 /* If there are saved registers but the LR isn't saved, then we need
3299 two instructions for the return. */
3300 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3301 return 0;
3303 /* Can't be done if any of the VFP regs are pushed,
3304 since this also requires an insn. */
3305 if (TARGET_HARD_FLOAT && TARGET_VFP)
3306 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3307 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3308 return 0;
3310 if (TARGET_REALLY_IWMMXT)
3311 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3312 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3313 return 0;
3315 return 1;
3318 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3319 shrink-wrapping if possible. This is the case if we need to emit a
3320 prologue, which we can test by looking at the offsets. */
3321 bool
3322 use_simple_return_p (void)
3324 arm_stack_offsets *offsets;
3326 offsets = arm_get_frame_offsets ();
3327 return offsets->outgoing_args != 0;
3330 /* Return TRUE if int I is a valid immediate ARM constant. */
3333 const_ok_for_arm (HOST_WIDE_INT i)
3335 int lowbit;
3337 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3338 be all zero, or all one. */
3339 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3340 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3341 != ((~(unsigned HOST_WIDE_INT) 0)
3342 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3343 return FALSE;
3345 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3347 /* Fast return for 0 and small values. We must do this for zero, since
3348 the code below can't handle that one case. */
3349 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3350 return TRUE;
3352 /* Get the number of trailing zeros. */
3353 lowbit = ffs((int) i) - 1;
3355 /* Only even shifts are allowed in ARM mode so round down to the
3356 nearest even number. */
3357 if (TARGET_ARM)
3358 lowbit &= ~1;
3360 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3361 return TRUE;
3363 if (TARGET_ARM)
3365 /* Allow rotated constants in ARM mode. */
3366 if (lowbit <= 4
3367 && ((i & ~0xc000003f) == 0
3368 || (i & ~0xf000000f) == 0
3369 || (i & ~0xfc000003) == 0))
3370 return TRUE;
3372 else
3374 HOST_WIDE_INT v;
3376 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3377 v = i & 0xff;
3378 v |= v << 16;
3379 if (i == v || i == (v | (v << 8)))
3380 return TRUE;
3382 /* Allow repeated pattern 0xXY00XY00. */
3383 v = i & 0xff00;
3384 v |= v << 16;
3385 if (i == v)
3386 return TRUE;
3389 return FALSE;
3392 /* Return true if I is a valid constant for the operation CODE. */
3394 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3396 if (const_ok_for_arm (i))
3397 return 1;
3399 switch (code)
3401 case SET:
3402 /* See if we can use movw. */
3403 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3404 return 1;
3405 else
3406 /* Otherwise, try mvn. */
3407 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3409 case PLUS:
3410 /* See if we can use addw or subw. */
3411 if (TARGET_THUMB2
3412 && ((i & 0xfffff000) == 0
3413 || ((-i) & 0xfffff000) == 0))
3414 return 1;
3415 /* else fall through. */
3417 case COMPARE:
3418 case EQ:
3419 case NE:
3420 case GT:
3421 case LE:
3422 case LT:
3423 case GE:
3424 case GEU:
3425 case LTU:
3426 case GTU:
3427 case LEU:
3428 case UNORDERED:
3429 case ORDERED:
3430 case UNEQ:
3431 case UNGE:
3432 case UNLT:
3433 case UNGT:
3434 case UNLE:
3435 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3437 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3438 case XOR:
3439 return 0;
3441 case IOR:
3442 if (TARGET_THUMB2)
3443 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3444 return 0;
3446 case AND:
3447 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3449 default:
3450 gcc_unreachable ();
3454 /* Return true if I is a valid di mode constant for the operation CODE. */
3456 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3458 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3459 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3460 rtx hi = GEN_INT (hi_val);
3461 rtx lo = GEN_INT (lo_val);
3463 if (TARGET_THUMB1)
3464 return 0;
3466 switch (code)
3468 case AND:
3469 case IOR:
3470 case XOR:
3471 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3472 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3473 case PLUS:
3474 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3476 default:
3477 return 0;
3481 /* Emit a sequence of insns to handle a large constant.
3482 CODE is the code of the operation required, it can be any of SET, PLUS,
3483 IOR, AND, XOR, MINUS;
3484 MODE is the mode in which the operation is being performed;
3485 VAL is the integer to operate on;
3486 SOURCE is the other operand (a register, or a null-pointer for SET);
3487 SUBTARGETS means it is safe to create scratch registers if that will
3488 either produce a simpler sequence, or we will want to cse the values.
3489 Return value is the number of insns emitted. */
3491 /* ??? Tweak this for thumb2. */
3493 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3494 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3496 rtx cond;
3498 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3499 cond = COND_EXEC_TEST (PATTERN (insn));
3500 else
3501 cond = NULL_RTX;
3503 if (subtargets || code == SET
3504 || (REG_P (target) && REG_P (source)
3505 && REGNO (target) != REGNO (source)))
3507 /* After arm_reorg has been called, we can't fix up expensive
3508 constants by pushing them into memory so we must synthesize
3509 them in-line, regardless of the cost. This is only likely to
3510 be more costly on chips that have load delay slots and we are
3511 compiling without running the scheduler (so no splitting
3512 occurred before the final instruction emission).
3514 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3516 if (!cfun->machine->after_arm_reorg
3517 && !cond
3518 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3519 1, 0)
3520 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3521 + (code != SET))))
3523 if (code == SET)
3525 /* Currently SET is the only monadic value for CODE, all
3526 the rest are diadic. */
3527 if (TARGET_USE_MOVT)
3528 arm_emit_movpair (target, GEN_INT (val));
3529 else
3530 emit_set_insn (target, GEN_INT (val));
3532 return 1;
3534 else
3536 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3538 if (TARGET_USE_MOVT)
3539 arm_emit_movpair (temp, GEN_INT (val));
3540 else
3541 emit_set_insn (temp, GEN_INT (val));
3543 /* For MINUS, the value is subtracted from, since we never
3544 have subtraction of a constant. */
3545 if (code == MINUS)
3546 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3547 else
3548 emit_set_insn (target,
3549 gen_rtx_fmt_ee (code, mode, source, temp));
3550 return 2;
3555 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3559 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3560 ARM/THUMB2 immediates, and add up to VAL.
3561 Thr function return value gives the number of insns required. */
3562 static int
3563 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3564 struct four_ints *return_sequence)
3566 int best_consecutive_zeros = 0;
3567 int i;
3568 int best_start = 0;
3569 int insns1, insns2;
3570 struct four_ints tmp_sequence;
3572 /* If we aren't targeting ARM, the best place to start is always at
3573 the bottom, otherwise look more closely. */
3574 if (TARGET_ARM)
3576 for (i = 0; i < 32; i += 2)
3578 int consecutive_zeros = 0;
3580 if (!(val & (3 << i)))
3582 while ((i < 32) && !(val & (3 << i)))
3584 consecutive_zeros += 2;
3585 i += 2;
3587 if (consecutive_zeros > best_consecutive_zeros)
3589 best_consecutive_zeros = consecutive_zeros;
3590 best_start = i - consecutive_zeros;
3592 i -= 2;
3597 /* So long as it won't require any more insns to do so, it's
3598 desirable to emit a small constant (in bits 0...9) in the last
3599 insn. This way there is more chance that it can be combined with
3600 a later addressing insn to form a pre-indexed load or store
3601 operation. Consider:
3603 *((volatile int *)0xe0000100) = 1;
3604 *((volatile int *)0xe0000110) = 2;
3606 We want this to wind up as:
3608 mov rA, #0xe0000000
3609 mov rB, #1
3610 str rB, [rA, #0x100]
3611 mov rB, #2
3612 str rB, [rA, #0x110]
3614 rather than having to synthesize both large constants from scratch.
3616 Therefore, we calculate how many insns would be required to emit
3617 the constant starting from `best_start', and also starting from
3618 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3619 yield a shorter sequence, we may as well use zero. */
3620 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3621 if (best_start != 0
3622 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3624 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3625 if (insns2 <= insns1)
3627 *return_sequence = tmp_sequence;
3628 insns1 = insns2;
3632 return insns1;
3635 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3636 static int
3637 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3638 struct four_ints *return_sequence, int i)
3640 int remainder = val & 0xffffffff;
3641 int insns = 0;
3643 /* Try and find a way of doing the job in either two or three
3644 instructions.
3646 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3647 location. We start at position I. This may be the MSB, or
3648 optimial_immediate_sequence may have positioned it at the largest block
3649 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3650 wrapping around to the top of the word when we drop off the bottom.
3651 In the worst case this code should produce no more than four insns.
3653 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3654 constants, shifted to any arbitrary location. We should always start
3655 at the MSB. */
3658 int end;
3659 unsigned int b1, b2, b3, b4;
3660 unsigned HOST_WIDE_INT result;
3661 int loc;
3663 gcc_assert (insns < 4);
3665 if (i <= 0)
3666 i += 32;
3668 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3669 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3671 loc = i;
3672 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3673 /* We can use addw/subw for the last 12 bits. */
3674 result = remainder;
3675 else
3677 /* Use an 8-bit shifted/rotated immediate. */
3678 end = i - 8;
3679 if (end < 0)
3680 end += 32;
3681 result = remainder & ((0x0ff << end)
3682 | ((i < end) ? (0xff >> (32 - end))
3683 : 0));
3684 i -= 8;
3687 else
3689 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3690 arbitrary shifts. */
3691 i -= TARGET_ARM ? 2 : 1;
3692 continue;
3695 /* Next, see if we can do a better job with a thumb2 replicated
3696 constant.
3698 We do it this way around to catch the cases like 0x01F001E0 where
3699 two 8-bit immediates would work, but a replicated constant would
3700 make it worse.
3702 TODO: 16-bit constants that don't clear all the bits, but still win.
3703 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3704 if (TARGET_THUMB2)
3706 b1 = (remainder & 0xff000000) >> 24;
3707 b2 = (remainder & 0x00ff0000) >> 16;
3708 b3 = (remainder & 0x0000ff00) >> 8;
3709 b4 = remainder & 0xff;
3711 if (loc > 24)
3713 /* The 8-bit immediate already found clears b1 (and maybe b2),
3714 but must leave b3 and b4 alone. */
3716 /* First try to find a 32-bit replicated constant that clears
3717 almost everything. We can assume that we can't do it in one,
3718 or else we wouldn't be here. */
3719 unsigned int tmp = b1 & b2 & b3 & b4;
3720 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3721 + (tmp << 24);
3722 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3723 + (tmp == b3) + (tmp == b4);
3724 if (tmp
3725 && (matching_bytes >= 3
3726 || (matching_bytes == 2
3727 && const_ok_for_op (remainder & ~tmp2, code))))
3729 /* At least 3 of the bytes match, and the fourth has at
3730 least as many bits set, or two of the bytes match
3731 and it will only require one more insn to finish. */
3732 result = tmp2;
3733 i = tmp != b1 ? 32
3734 : tmp != b2 ? 24
3735 : tmp != b3 ? 16
3736 : 8;
3739 /* Second, try to find a 16-bit replicated constant that can
3740 leave three of the bytes clear. If b2 or b4 is already
3741 zero, then we can. If the 8-bit from above would not
3742 clear b2 anyway, then we still win. */
3743 else if (b1 == b3 && (!b2 || !b4
3744 || (remainder & 0x00ff0000 & ~result)))
3746 result = remainder & 0xff00ff00;
3747 i = 24;
3750 else if (loc > 16)
3752 /* The 8-bit immediate already found clears b2 (and maybe b3)
3753 and we don't get here unless b1 is alredy clear, but it will
3754 leave b4 unchanged. */
3756 /* If we can clear b2 and b4 at once, then we win, since the
3757 8-bits couldn't possibly reach that far. */
3758 if (b2 == b4)
3760 result = remainder & 0x00ff00ff;
3761 i = 16;
3766 return_sequence->i[insns++] = result;
3767 remainder &= ~result;
3769 if (code == SET || code == MINUS)
3770 code = PLUS;
3772 while (remainder);
3774 return insns;
3777 /* Emit an instruction with the indicated PATTERN. If COND is
3778 non-NULL, conditionalize the execution of the instruction on COND
3779 being true. */
3781 static void
3782 emit_constant_insn (rtx cond, rtx pattern)
3784 if (cond)
3785 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3786 emit_insn (pattern);
3789 /* As above, but extra parameter GENERATE which, if clear, suppresses
3790 RTL generation. */
3792 static int
3793 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3794 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3795 int generate)
3797 int can_invert = 0;
3798 int can_negate = 0;
3799 int final_invert = 0;
3800 int i;
3801 int set_sign_bit_copies = 0;
3802 int clear_sign_bit_copies = 0;
3803 int clear_zero_bit_copies = 0;
3804 int set_zero_bit_copies = 0;
3805 int insns = 0, neg_insns, inv_insns;
3806 unsigned HOST_WIDE_INT temp1, temp2;
3807 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3808 struct four_ints *immediates;
3809 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3811 /* Find out which operations are safe for a given CODE. Also do a quick
3812 check for degenerate cases; these can occur when DImode operations
3813 are split. */
3814 switch (code)
3816 case SET:
3817 can_invert = 1;
3818 break;
3820 case PLUS:
3821 can_negate = 1;
3822 break;
3824 case IOR:
3825 if (remainder == 0xffffffff)
3827 if (generate)
3828 emit_constant_insn (cond,
3829 gen_rtx_SET (VOIDmode, target,
3830 GEN_INT (ARM_SIGN_EXTEND (val))));
3831 return 1;
3834 if (remainder == 0)
3836 if (reload_completed && rtx_equal_p (target, source))
3837 return 0;
3839 if (generate)
3840 emit_constant_insn (cond,
3841 gen_rtx_SET (VOIDmode, target, source));
3842 return 1;
3844 break;
3846 case AND:
3847 if (remainder == 0)
3849 if (generate)
3850 emit_constant_insn (cond,
3851 gen_rtx_SET (VOIDmode, target, const0_rtx));
3852 return 1;
3854 if (remainder == 0xffffffff)
3856 if (reload_completed && rtx_equal_p (target, source))
3857 return 0;
3858 if (generate)
3859 emit_constant_insn (cond,
3860 gen_rtx_SET (VOIDmode, target, source));
3861 return 1;
3863 can_invert = 1;
3864 break;
3866 case XOR:
3867 if (remainder == 0)
3869 if (reload_completed && rtx_equal_p (target, source))
3870 return 0;
3871 if (generate)
3872 emit_constant_insn (cond,
3873 gen_rtx_SET (VOIDmode, target, source));
3874 return 1;
3877 if (remainder == 0xffffffff)
3879 if (generate)
3880 emit_constant_insn (cond,
3881 gen_rtx_SET (VOIDmode, target,
3882 gen_rtx_NOT (mode, source)));
3883 return 1;
3885 final_invert = 1;
3886 break;
3888 case MINUS:
3889 /* We treat MINUS as (val - source), since (source - val) is always
3890 passed as (source + (-val)). */
3891 if (remainder == 0)
3893 if (generate)
3894 emit_constant_insn (cond,
3895 gen_rtx_SET (VOIDmode, target,
3896 gen_rtx_NEG (mode, source)));
3897 return 1;
3899 if (const_ok_for_arm (val))
3901 if (generate)
3902 emit_constant_insn (cond,
3903 gen_rtx_SET (VOIDmode, target,
3904 gen_rtx_MINUS (mode, GEN_INT (val),
3905 source)));
3906 return 1;
3909 break;
3911 default:
3912 gcc_unreachable ();
3915 /* If we can do it in one insn get out quickly. */
3916 if (const_ok_for_op (val, code))
3918 if (generate)
3919 emit_constant_insn (cond,
3920 gen_rtx_SET (VOIDmode, target,
3921 (source
3922 ? gen_rtx_fmt_ee (code, mode, source,
3923 GEN_INT (val))
3924 : GEN_INT (val))));
3925 return 1;
3928 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3929 insn. */
3930 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3931 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3933 if (generate)
3935 if (mode == SImode && i == 16)
3936 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3937 smaller insn. */
3938 emit_constant_insn (cond,
3939 gen_zero_extendhisi2
3940 (target, gen_lowpart (HImode, source)));
3941 else
3942 /* Extz only supports SImode, but we can coerce the operands
3943 into that mode. */
3944 emit_constant_insn (cond,
3945 gen_extzv_t2 (gen_lowpart (SImode, target),
3946 gen_lowpart (SImode, source),
3947 GEN_INT (i), const0_rtx));
3950 return 1;
3953 /* Calculate a few attributes that may be useful for specific
3954 optimizations. */
3955 /* Count number of leading zeros. */
3956 for (i = 31; i >= 0; i--)
3958 if ((remainder & (1 << i)) == 0)
3959 clear_sign_bit_copies++;
3960 else
3961 break;
3964 /* Count number of leading 1's. */
3965 for (i = 31; i >= 0; i--)
3967 if ((remainder & (1 << i)) != 0)
3968 set_sign_bit_copies++;
3969 else
3970 break;
3973 /* Count number of trailing zero's. */
3974 for (i = 0; i <= 31; i++)
3976 if ((remainder & (1 << i)) == 0)
3977 clear_zero_bit_copies++;
3978 else
3979 break;
3982 /* Count number of trailing 1's. */
3983 for (i = 0; i <= 31; i++)
3985 if ((remainder & (1 << i)) != 0)
3986 set_zero_bit_copies++;
3987 else
3988 break;
3991 switch (code)
3993 case SET:
3994 /* See if we can do this by sign_extending a constant that is known
3995 to be negative. This is a good, way of doing it, since the shift
3996 may well merge into a subsequent insn. */
3997 if (set_sign_bit_copies > 1)
3999 if (const_ok_for_arm
4000 (temp1 = ARM_SIGN_EXTEND (remainder
4001 << (set_sign_bit_copies - 1))))
4003 if (generate)
4005 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4006 emit_constant_insn (cond,
4007 gen_rtx_SET (VOIDmode, new_src,
4008 GEN_INT (temp1)));
4009 emit_constant_insn (cond,
4010 gen_ashrsi3 (target, new_src,
4011 GEN_INT (set_sign_bit_copies - 1)));
4013 return 2;
4015 /* For an inverted constant, we will need to set the low bits,
4016 these will be shifted out of harm's way. */
4017 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4018 if (const_ok_for_arm (~temp1))
4020 if (generate)
4022 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4023 emit_constant_insn (cond,
4024 gen_rtx_SET (VOIDmode, new_src,
4025 GEN_INT (temp1)));
4026 emit_constant_insn (cond,
4027 gen_ashrsi3 (target, new_src,
4028 GEN_INT (set_sign_bit_copies - 1)));
4030 return 2;
4034 /* See if we can calculate the value as the difference between two
4035 valid immediates. */
4036 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4038 int topshift = clear_sign_bit_copies & ~1;
4040 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4041 & (0xff000000 >> topshift));
4043 /* If temp1 is zero, then that means the 9 most significant
4044 bits of remainder were 1 and we've caused it to overflow.
4045 When topshift is 0 we don't need to do anything since we
4046 can borrow from 'bit 32'. */
4047 if (temp1 == 0 && topshift != 0)
4048 temp1 = 0x80000000 >> (topshift - 1);
4050 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4052 if (const_ok_for_arm (temp2))
4054 if (generate)
4056 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4057 emit_constant_insn (cond,
4058 gen_rtx_SET (VOIDmode, new_src,
4059 GEN_INT (temp1)));
4060 emit_constant_insn (cond,
4061 gen_addsi3 (target, new_src,
4062 GEN_INT (-temp2)));
4065 return 2;
4069 /* See if we can generate this by setting the bottom (or the top)
4070 16 bits, and then shifting these into the other half of the
4071 word. We only look for the simplest cases, to do more would cost
4072 too much. Be careful, however, not to generate this when the
4073 alternative would take fewer insns. */
4074 if (val & 0xffff0000)
4076 temp1 = remainder & 0xffff0000;
4077 temp2 = remainder & 0x0000ffff;
4079 /* Overlaps outside this range are best done using other methods. */
4080 for (i = 9; i < 24; i++)
4082 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4083 && !const_ok_for_arm (temp2))
4085 rtx new_src = (subtargets
4086 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4087 : target);
4088 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4089 source, subtargets, generate);
4090 source = new_src;
4091 if (generate)
4092 emit_constant_insn
4093 (cond,
4094 gen_rtx_SET
4095 (VOIDmode, target,
4096 gen_rtx_IOR (mode,
4097 gen_rtx_ASHIFT (mode, source,
4098 GEN_INT (i)),
4099 source)));
4100 return insns + 1;
4104 /* Don't duplicate cases already considered. */
4105 for (i = 17; i < 24; i++)
4107 if (((temp1 | (temp1 >> i)) == remainder)
4108 && !const_ok_for_arm (temp1))
4110 rtx new_src = (subtargets
4111 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4112 : target);
4113 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4114 source, subtargets, generate);
4115 source = new_src;
4116 if (generate)
4117 emit_constant_insn
4118 (cond,
4119 gen_rtx_SET (VOIDmode, target,
4120 gen_rtx_IOR
4121 (mode,
4122 gen_rtx_LSHIFTRT (mode, source,
4123 GEN_INT (i)),
4124 source)));
4125 return insns + 1;
4129 break;
4131 case IOR:
4132 case XOR:
4133 /* If we have IOR or XOR, and the constant can be loaded in a
4134 single instruction, and we can find a temporary to put it in,
4135 then this can be done in two instructions instead of 3-4. */
4136 if (subtargets
4137 /* TARGET can't be NULL if SUBTARGETS is 0 */
4138 || (reload_completed && !reg_mentioned_p (target, source)))
4140 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4142 if (generate)
4144 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4146 emit_constant_insn (cond,
4147 gen_rtx_SET (VOIDmode, sub,
4148 GEN_INT (val)));
4149 emit_constant_insn (cond,
4150 gen_rtx_SET (VOIDmode, target,
4151 gen_rtx_fmt_ee (code, mode,
4152 source, sub)));
4154 return 2;
4158 if (code == XOR)
4159 break;
4161 /* Convert.
4162 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4163 and the remainder 0s for e.g. 0xfff00000)
4164 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4166 This can be done in 2 instructions by using shifts with mov or mvn.
4167 e.g. for
4168 x = x | 0xfff00000;
4169 we generate.
4170 mvn r0, r0, asl #12
4171 mvn r0, r0, lsr #12 */
4172 if (set_sign_bit_copies > 8
4173 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4175 if (generate)
4177 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4178 rtx shift = GEN_INT (set_sign_bit_copies);
4180 emit_constant_insn
4181 (cond,
4182 gen_rtx_SET (VOIDmode, sub,
4183 gen_rtx_NOT (mode,
4184 gen_rtx_ASHIFT (mode,
4185 source,
4186 shift))));
4187 emit_constant_insn
4188 (cond,
4189 gen_rtx_SET (VOIDmode, target,
4190 gen_rtx_NOT (mode,
4191 gen_rtx_LSHIFTRT (mode, sub,
4192 shift))));
4194 return 2;
4197 /* Convert
4198 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4200 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4202 For eg. r0 = r0 | 0xfff
4203 mvn r0, r0, lsr #12
4204 mvn r0, r0, asl #12
4207 if (set_zero_bit_copies > 8
4208 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4210 if (generate)
4212 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4213 rtx shift = GEN_INT (set_zero_bit_copies);
4215 emit_constant_insn
4216 (cond,
4217 gen_rtx_SET (VOIDmode, sub,
4218 gen_rtx_NOT (mode,
4219 gen_rtx_LSHIFTRT (mode,
4220 source,
4221 shift))));
4222 emit_constant_insn
4223 (cond,
4224 gen_rtx_SET (VOIDmode, target,
4225 gen_rtx_NOT (mode,
4226 gen_rtx_ASHIFT (mode, sub,
4227 shift))));
4229 return 2;
4232 /* This will never be reached for Thumb2 because orn is a valid
4233 instruction. This is for Thumb1 and the ARM 32 bit cases.
4235 x = y | constant (such that ~constant is a valid constant)
4236 Transform this to
4237 x = ~(~y & ~constant).
4239 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4241 if (generate)
4243 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4244 emit_constant_insn (cond,
4245 gen_rtx_SET (VOIDmode, sub,
4246 gen_rtx_NOT (mode, source)));
4247 source = sub;
4248 if (subtargets)
4249 sub = gen_reg_rtx (mode);
4250 emit_constant_insn (cond,
4251 gen_rtx_SET (VOIDmode, sub,
4252 gen_rtx_AND (mode, source,
4253 GEN_INT (temp1))));
4254 emit_constant_insn (cond,
4255 gen_rtx_SET (VOIDmode, target,
4256 gen_rtx_NOT (mode, sub)));
4258 return 3;
4260 break;
4262 case AND:
4263 /* See if two shifts will do 2 or more insn's worth of work. */
4264 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4266 HOST_WIDE_INT shift_mask = ((0xffffffff
4267 << (32 - clear_sign_bit_copies))
4268 & 0xffffffff);
4270 if ((remainder | shift_mask) != 0xffffffff)
4272 if (generate)
4274 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4275 insns = arm_gen_constant (AND, mode, cond,
4276 remainder | shift_mask,
4277 new_src, source, subtargets, 1);
4278 source = new_src;
4280 else
4282 rtx targ = subtargets ? NULL_RTX : target;
4283 insns = arm_gen_constant (AND, mode, cond,
4284 remainder | shift_mask,
4285 targ, source, subtargets, 0);
4289 if (generate)
4291 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4292 rtx shift = GEN_INT (clear_sign_bit_copies);
4294 emit_insn (gen_ashlsi3 (new_src, source, shift));
4295 emit_insn (gen_lshrsi3 (target, new_src, shift));
4298 return insns + 2;
4301 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4303 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4305 if ((remainder | shift_mask) != 0xffffffff)
4307 if (generate)
4309 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4311 insns = arm_gen_constant (AND, mode, cond,
4312 remainder | shift_mask,
4313 new_src, source, subtargets, 1);
4314 source = new_src;
4316 else
4318 rtx targ = subtargets ? NULL_RTX : target;
4320 insns = arm_gen_constant (AND, mode, cond,
4321 remainder | shift_mask,
4322 targ, source, subtargets, 0);
4326 if (generate)
4328 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4329 rtx shift = GEN_INT (clear_zero_bit_copies);
4331 emit_insn (gen_lshrsi3 (new_src, source, shift));
4332 emit_insn (gen_ashlsi3 (target, new_src, shift));
4335 return insns + 2;
4338 break;
4340 default:
4341 break;
4344 /* Calculate what the instruction sequences would be if we generated it
4345 normally, negated, or inverted. */
4346 if (code == AND)
4347 /* AND cannot be split into multiple insns, so invert and use BIC. */
4348 insns = 99;
4349 else
4350 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4352 if (can_negate)
4353 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4354 &neg_immediates);
4355 else
4356 neg_insns = 99;
4358 if (can_invert || final_invert)
4359 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4360 &inv_immediates);
4361 else
4362 inv_insns = 99;
4364 immediates = &pos_immediates;
4366 /* Is the negated immediate sequence more efficient? */
4367 if (neg_insns < insns && neg_insns <= inv_insns)
4369 insns = neg_insns;
4370 immediates = &neg_immediates;
4372 else
4373 can_negate = 0;
4375 /* Is the inverted immediate sequence more efficient?
4376 We must allow for an extra NOT instruction for XOR operations, although
4377 there is some chance that the final 'mvn' will get optimized later. */
4378 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4380 insns = inv_insns;
4381 immediates = &inv_immediates;
4383 else
4385 can_invert = 0;
4386 final_invert = 0;
4389 /* Now output the chosen sequence as instructions. */
4390 if (generate)
4392 for (i = 0; i < insns; i++)
4394 rtx new_src, temp1_rtx;
4396 temp1 = immediates->i[i];
4398 if (code == SET || code == MINUS)
4399 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4400 else if ((final_invert || i < (insns - 1)) && subtargets)
4401 new_src = gen_reg_rtx (mode);
4402 else
4403 new_src = target;
4405 if (can_invert)
4406 temp1 = ~temp1;
4407 else if (can_negate)
4408 temp1 = -temp1;
4410 temp1 = trunc_int_for_mode (temp1, mode);
4411 temp1_rtx = GEN_INT (temp1);
4413 if (code == SET)
4415 else if (code == MINUS)
4416 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4417 else
4418 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4420 emit_constant_insn (cond,
4421 gen_rtx_SET (VOIDmode, new_src,
4422 temp1_rtx));
4423 source = new_src;
4425 if (code == SET)
4427 can_negate = can_invert;
4428 can_invert = 0;
4429 code = PLUS;
4431 else if (code == MINUS)
4432 code = PLUS;
4436 if (final_invert)
4438 if (generate)
4439 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4440 gen_rtx_NOT (mode, source)));
4441 insns++;
4444 return insns;
4447 /* Canonicalize a comparison so that we are more likely to recognize it.
4448 This can be done for a few constant compares, where we can make the
4449 immediate value easier to load. */
4451 static void
4452 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4453 bool op0_preserve_value)
4455 enum machine_mode mode;
4456 unsigned HOST_WIDE_INT i, maxval;
4458 mode = GET_MODE (*op0);
4459 if (mode == VOIDmode)
4460 mode = GET_MODE (*op1);
4462 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4464 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4465 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4466 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4467 for GTU/LEU in Thumb mode. */
4468 if (mode == DImode)
4470 rtx tem;
4472 if (*code == GT || *code == LE
4473 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4475 /* Missing comparison. First try to use an available
4476 comparison. */
4477 if (CONST_INT_P (*op1))
4479 i = INTVAL (*op1);
4480 switch (*code)
4482 case GT:
4483 case LE:
4484 if (i != maxval
4485 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4487 *op1 = GEN_INT (i + 1);
4488 *code = *code == GT ? GE : LT;
4489 return;
4491 break;
4492 case GTU:
4493 case LEU:
4494 if (i != ~((unsigned HOST_WIDE_INT) 0)
4495 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4497 *op1 = GEN_INT (i + 1);
4498 *code = *code == GTU ? GEU : LTU;
4499 return;
4501 break;
4502 default:
4503 gcc_unreachable ();
4507 /* If that did not work, reverse the condition. */
4508 if (!op0_preserve_value)
4510 tem = *op0;
4511 *op0 = *op1;
4512 *op1 = tem;
4513 *code = (int)swap_condition ((enum rtx_code)*code);
4516 return;
4519 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4520 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4521 to facilitate possible combining with a cmp into 'ands'. */
4522 if (mode == SImode
4523 && GET_CODE (*op0) == ZERO_EXTEND
4524 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4525 && GET_MODE (XEXP (*op0, 0)) == QImode
4526 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4527 && subreg_lowpart_p (XEXP (*op0, 0))
4528 && *op1 == const0_rtx)
4529 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4530 GEN_INT (255));
4532 /* Comparisons smaller than DImode. Only adjust comparisons against
4533 an out-of-range constant. */
4534 if (!CONST_INT_P (*op1)
4535 || const_ok_for_arm (INTVAL (*op1))
4536 || const_ok_for_arm (- INTVAL (*op1)))
4537 return;
4539 i = INTVAL (*op1);
4541 switch (*code)
4543 case EQ:
4544 case NE:
4545 return;
4547 case GT:
4548 case LE:
4549 if (i != maxval
4550 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4552 *op1 = GEN_INT (i + 1);
4553 *code = *code == GT ? GE : LT;
4554 return;
4556 break;
4558 case GE:
4559 case LT:
4560 if (i != ~maxval
4561 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4563 *op1 = GEN_INT (i - 1);
4564 *code = *code == GE ? GT : LE;
4565 return;
4567 break;
4569 case GTU:
4570 case LEU:
4571 if (i != ~((unsigned HOST_WIDE_INT) 0)
4572 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4574 *op1 = GEN_INT (i + 1);
4575 *code = *code == GTU ? GEU : LTU;
4576 return;
4578 break;
4580 case GEU:
4581 case LTU:
4582 if (i != 0
4583 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4585 *op1 = GEN_INT (i - 1);
4586 *code = *code == GEU ? GTU : LEU;
4587 return;
4589 break;
4591 default:
4592 gcc_unreachable ();
4597 /* Define how to find the value returned by a function. */
4599 static rtx
4600 arm_function_value(const_tree type, const_tree func,
4601 bool outgoing ATTRIBUTE_UNUSED)
4603 enum machine_mode mode;
4604 int unsignedp ATTRIBUTE_UNUSED;
4605 rtx r ATTRIBUTE_UNUSED;
4607 mode = TYPE_MODE (type);
4609 if (TARGET_AAPCS_BASED)
4610 return aapcs_allocate_return_reg (mode, type, func);
4612 /* Promote integer types. */
4613 if (INTEGRAL_TYPE_P (type))
4614 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4616 /* Promotes small structs returned in a register to full-word size
4617 for big-endian AAPCS. */
4618 if (arm_return_in_msb (type))
4620 HOST_WIDE_INT size = int_size_in_bytes (type);
4621 if (size % UNITS_PER_WORD != 0)
4623 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4624 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4628 return arm_libcall_value_1 (mode);
4631 /* libcall hashtable helpers. */
4633 struct libcall_hasher : typed_noop_remove <rtx_def>
4635 typedef rtx_def value_type;
4636 typedef rtx_def compare_type;
4637 static inline hashval_t hash (const value_type *);
4638 static inline bool equal (const value_type *, const compare_type *);
4639 static inline void remove (value_type *);
4642 inline bool
4643 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4645 return rtx_equal_p (p1, p2);
4648 inline hashval_t
4649 libcall_hasher::hash (const value_type *p1)
4651 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4654 typedef hash_table <libcall_hasher> libcall_table_type;
4656 static void
4657 add_libcall (libcall_table_type htab, rtx libcall)
4659 *htab.find_slot (libcall, INSERT) = libcall;
4662 static bool
4663 arm_libcall_uses_aapcs_base (const_rtx libcall)
4665 static bool init_done = false;
4666 static libcall_table_type libcall_htab;
4668 if (!init_done)
4670 init_done = true;
4672 libcall_htab.create (31);
4673 add_libcall (libcall_htab,
4674 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4675 add_libcall (libcall_htab,
4676 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4677 add_libcall (libcall_htab,
4678 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4679 add_libcall (libcall_htab,
4680 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4682 add_libcall (libcall_htab,
4683 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4684 add_libcall (libcall_htab,
4685 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4686 add_libcall (libcall_htab,
4687 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4688 add_libcall (libcall_htab,
4689 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4691 add_libcall (libcall_htab,
4692 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4693 add_libcall (libcall_htab,
4694 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4695 add_libcall (libcall_htab,
4696 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4697 add_libcall (libcall_htab,
4698 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4699 add_libcall (libcall_htab,
4700 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4701 add_libcall (libcall_htab,
4702 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4703 add_libcall (libcall_htab,
4704 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4705 add_libcall (libcall_htab,
4706 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4708 /* Values from double-precision helper functions are returned in core
4709 registers if the selected core only supports single-precision
4710 arithmetic, even if we are using the hard-float ABI. The same is
4711 true for single-precision helpers, but we will never be using the
4712 hard-float ABI on a CPU which doesn't support single-precision
4713 operations in hardware. */
4714 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4715 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4716 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4717 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4718 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4719 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4720 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4721 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4722 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4723 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4724 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4725 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4726 SFmode));
4727 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4728 DFmode));
4731 return libcall && libcall_htab.find (libcall) != NULL;
4734 static rtx
4735 arm_libcall_value_1 (enum machine_mode mode)
4737 if (TARGET_AAPCS_BASED)
4738 return aapcs_libcall_value (mode);
4739 else if (TARGET_IWMMXT_ABI
4740 && arm_vector_mode_supported_p (mode))
4741 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4742 else
4743 return gen_rtx_REG (mode, ARG_REGISTER (1));
4746 /* Define how to find the value returned by a library function
4747 assuming the value has mode MODE. */
4749 static rtx
4750 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4752 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4753 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4755 /* The following libcalls return their result in integer registers,
4756 even though they return a floating point value. */
4757 if (arm_libcall_uses_aapcs_base (libcall))
4758 return gen_rtx_REG (mode, ARG_REGISTER(1));
4762 return arm_libcall_value_1 (mode);
4765 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4767 static bool
4768 arm_function_value_regno_p (const unsigned int regno)
4770 if (regno == ARG_REGISTER (1)
4771 || (TARGET_32BIT
4772 && TARGET_AAPCS_BASED
4773 && TARGET_VFP
4774 && TARGET_HARD_FLOAT
4775 && regno == FIRST_VFP_REGNUM)
4776 || (TARGET_IWMMXT_ABI
4777 && regno == FIRST_IWMMXT_REGNUM))
4778 return true;
4780 return false;
4783 /* Determine the amount of memory needed to store the possible return
4784 registers of an untyped call. */
4786 arm_apply_result_size (void)
4788 int size = 16;
4790 if (TARGET_32BIT)
4792 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4793 size += 32;
4794 if (TARGET_IWMMXT_ABI)
4795 size += 8;
4798 return size;
4801 /* Decide whether TYPE should be returned in memory (true)
4802 or in a register (false). FNTYPE is the type of the function making
4803 the call. */
4804 static bool
4805 arm_return_in_memory (const_tree type, const_tree fntype)
4807 HOST_WIDE_INT size;
4809 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4811 if (TARGET_AAPCS_BASED)
4813 /* Simple, non-aggregate types (ie not including vectors and
4814 complex) are always returned in a register (or registers).
4815 We don't care about which register here, so we can short-cut
4816 some of the detail. */
4817 if (!AGGREGATE_TYPE_P (type)
4818 && TREE_CODE (type) != VECTOR_TYPE
4819 && TREE_CODE (type) != COMPLEX_TYPE)
4820 return false;
4822 /* Any return value that is no larger than one word can be
4823 returned in r0. */
4824 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4825 return false;
4827 /* Check any available co-processors to see if they accept the
4828 type as a register candidate (VFP, for example, can return
4829 some aggregates in consecutive registers). These aren't
4830 available if the call is variadic. */
4831 if (aapcs_select_return_coproc (type, fntype) >= 0)
4832 return false;
4834 /* Vector values should be returned using ARM registers, not
4835 memory (unless they're over 16 bytes, which will break since
4836 we only have four call-clobbered registers to play with). */
4837 if (TREE_CODE (type) == VECTOR_TYPE)
4838 return (size < 0 || size > (4 * UNITS_PER_WORD));
4840 /* The rest go in memory. */
4841 return true;
4844 if (TREE_CODE (type) == VECTOR_TYPE)
4845 return (size < 0 || size > (4 * UNITS_PER_WORD));
4847 if (!AGGREGATE_TYPE_P (type) &&
4848 (TREE_CODE (type) != VECTOR_TYPE))
4849 /* All simple types are returned in registers. */
4850 return false;
4852 if (arm_abi != ARM_ABI_APCS)
4854 /* ATPCS and later return aggregate types in memory only if they are
4855 larger than a word (or are variable size). */
4856 return (size < 0 || size > UNITS_PER_WORD);
4859 /* For the arm-wince targets we choose to be compatible with Microsoft's
4860 ARM and Thumb compilers, which always return aggregates in memory. */
4861 #ifndef ARM_WINCE
4862 /* All structures/unions bigger than one word are returned in memory.
4863 Also catch the case where int_size_in_bytes returns -1. In this case
4864 the aggregate is either huge or of variable size, and in either case
4865 we will want to return it via memory and not in a register. */
4866 if (size < 0 || size > UNITS_PER_WORD)
4867 return true;
4869 if (TREE_CODE (type) == RECORD_TYPE)
4871 tree field;
4873 /* For a struct the APCS says that we only return in a register
4874 if the type is 'integer like' and every addressable element
4875 has an offset of zero. For practical purposes this means
4876 that the structure can have at most one non bit-field element
4877 and that this element must be the first one in the structure. */
4879 /* Find the first field, ignoring non FIELD_DECL things which will
4880 have been created by C++. */
4881 for (field = TYPE_FIELDS (type);
4882 field && TREE_CODE (field) != FIELD_DECL;
4883 field = DECL_CHAIN (field))
4884 continue;
4886 if (field == NULL)
4887 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4889 /* Check that the first field is valid for returning in a register. */
4891 /* ... Floats are not allowed */
4892 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4893 return true;
4895 /* ... Aggregates that are not themselves valid for returning in
4896 a register are not allowed. */
4897 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4898 return true;
4900 /* Now check the remaining fields, if any. Only bitfields are allowed,
4901 since they are not addressable. */
4902 for (field = DECL_CHAIN (field);
4903 field;
4904 field = DECL_CHAIN (field))
4906 if (TREE_CODE (field) != FIELD_DECL)
4907 continue;
4909 if (!DECL_BIT_FIELD_TYPE (field))
4910 return true;
4913 return false;
4916 if (TREE_CODE (type) == UNION_TYPE)
4918 tree field;
4920 /* Unions can be returned in registers if every element is
4921 integral, or can be returned in an integer register. */
4922 for (field = TYPE_FIELDS (type);
4923 field;
4924 field = DECL_CHAIN (field))
4926 if (TREE_CODE (field) != FIELD_DECL)
4927 continue;
4929 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4930 return true;
4932 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4933 return true;
4936 return false;
4938 #endif /* not ARM_WINCE */
4940 /* Return all other types in memory. */
4941 return true;
4944 const struct pcs_attribute_arg
4946 const char *arg;
4947 enum arm_pcs value;
4948 } pcs_attribute_args[] =
4950 {"aapcs", ARM_PCS_AAPCS},
4951 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4952 #if 0
4953 /* We could recognize these, but changes would be needed elsewhere
4954 * to implement them. */
4955 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4956 {"atpcs", ARM_PCS_ATPCS},
4957 {"apcs", ARM_PCS_APCS},
4958 #endif
4959 {NULL, ARM_PCS_UNKNOWN}
4962 static enum arm_pcs
4963 arm_pcs_from_attribute (tree attr)
4965 const struct pcs_attribute_arg *ptr;
4966 const char *arg;
4968 /* Get the value of the argument. */
4969 if (TREE_VALUE (attr) == NULL_TREE
4970 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4971 return ARM_PCS_UNKNOWN;
4973 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4975 /* Check it against the list of known arguments. */
4976 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4977 if (streq (arg, ptr->arg))
4978 return ptr->value;
4980 /* An unrecognized interrupt type. */
4981 return ARM_PCS_UNKNOWN;
4984 /* Get the PCS variant to use for this call. TYPE is the function's type
4985 specification, DECL is the specific declartion. DECL may be null if
4986 the call could be indirect or if this is a library call. */
4987 static enum arm_pcs
4988 arm_get_pcs_model (const_tree type, const_tree decl)
4990 bool user_convention = false;
4991 enum arm_pcs user_pcs = arm_pcs_default;
4992 tree attr;
4994 gcc_assert (type);
4996 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4997 if (attr)
4999 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5000 user_convention = true;
5003 if (TARGET_AAPCS_BASED)
5005 /* Detect varargs functions. These always use the base rules
5006 (no argument is ever a candidate for a co-processor
5007 register). */
5008 bool base_rules = stdarg_p (type);
5010 if (user_convention)
5012 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5013 sorry ("non-AAPCS derived PCS variant");
5014 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5015 error ("variadic functions must use the base AAPCS variant");
5018 if (base_rules)
5019 return ARM_PCS_AAPCS;
5020 else if (user_convention)
5021 return user_pcs;
5022 else if (decl && flag_unit_at_a_time)
5024 /* Local functions never leak outside this compilation unit,
5025 so we are free to use whatever conventions are
5026 appropriate. */
5027 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5028 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5029 if (i && i->local)
5030 return ARM_PCS_AAPCS_LOCAL;
5033 else if (user_convention && user_pcs != arm_pcs_default)
5034 sorry ("PCS variant");
5036 /* For everything else we use the target's default. */
5037 return arm_pcs_default;
5041 static void
5042 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5043 const_tree fntype ATTRIBUTE_UNUSED,
5044 rtx libcall ATTRIBUTE_UNUSED,
5045 const_tree fndecl ATTRIBUTE_UNUSED)
5047 /* Record the unallocated VFP registers. */
5048 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5049 pcum->aapcs_vfp_reg_alloc = 0;
5052 /* Walk down the type tree of TYPE counting consecutive base elements.
5053 If *MODEP is VOIDmode, then set it to the first valid floating point
5054 type. If a non-floating point type is found, or if a floating point
5055 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5056 otherwise return the count in the sub-tree. */
5057 static int
5058 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5060 enum machine_mode mode;
5061 HOST_WIDE_INT size;
5063 switch (TREE_CODE (type))
5065 case REAL_TYPE:
5066 mode = TYPE_MODE (type);
5067 if (mode != DFmode && mode != SFmode)
5068 return -1;
5070 if (*modep == VOIDmode)
5071 *modep = mode;
5073 if (*modep == mode)
5074 return 1;
5076 break;
5078 case COMPLEX_TYPE:
5079 mode = TYPE_MODE (TREE_TYPE (type));
5080 if (mode != DFmode && mode != SFmode)
5081 return -1;
5083 if (*modep == VOIDmode)
5084 *modep = mode;
5086 if (*modep == mode)
5087 return 2;
5089 break;
5091 case VECTOR_TYPE:
5092 /* Use V2SImode and V4SImode as representatives of all 64-bit
5093 and 128-bit vector types, whether or not those modes are
5094 supported with the present options. */
5095 size = int_size_in_bytes (type);
5096 switch (size)
5098 case 8:
5099 mode = V2SImode;
5100 break;
5101 case 16:
5102 mode = V4SImode;
5103 break;
5104 default:
5105 return -1;
5108 if (*modep == VOIDmode)
5109 *modep = mode;
5111 /* Vector modes are considered to be opaque: two vectors are
5112 equivalent for the purposes of being homogeneous aggregates
5113 if they are the same size. */
5114 if (*modep == mode)
5115 return 1;
5117 break;
5119 case ARRAY_TYPE:
5121 int count;
5122 tree index = TYPE_DOMAIN (type);
5124 /* Can't handle incomplete types. */
5125 if (!COMPLETE_TYPE_P (type))
5126 return -1;
5128 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5129 if (count == -1
5130 || !index
5131 || !TYPE_MAX_VALUE (index)
5132 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5133 || !TYPE_MIN_VALUE (index)
5134 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5135 || count < 0)
5136 return -1;
5138 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5139 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5141 /* There must be no padding. */
5142 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5143 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5144 != count * GET_MODE_BITSIZE (*modep)))
5145 return -1;
5147 return count;
5150 case RECORD_TYPE:
5152 int count = 0;
5153 int sub_count;
5154 tree field;
5156 /* Can't handle incomplete types. */
5157 if (!COMPLETE_TYPE_P (type))
5158 return -1;
5160 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5162 if (TREE_CODE (field) != FIELD_DECL)
5163 continue;
5165 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5166 if (sub_count < 0)
5167 return -1;
5168 count += sub_count;
5171 /* There must be no padding. */
5172 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5173 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5174 != count * GET_MODE_BITSIZE (*modep)))
5175 return -1;
5177 return count;
5180 case UNION_TYPE:
5181 case QUAL_UNION_TYPE:
5183 /* These aren't very interesting except in a degenerate case. */
5184 int count = 0;
5185 int sub_count;
5186 tree field;
5188 /* Can't handle incomplete types. */
5189 if (!COMPLETE_TYPE_P (type))
5190 return -1;
5192 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5194 if (TREE_CODE (field) != FIELD_DECL)
5195 continue;
5197 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5198 if (sub_count < 0)
5199 return -1;
5200 count = count > sub_count ? count : sub_count;
5203 /* There must be no padding. */
5204 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5205 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5206 != count * GET_MODE_BITSIZE (*modep)))
5207 return -1;
5209 return count;
5212 default:
5213 break;
5216 return -1;
5219 /* Return true if PCS_VARIANT should use VFP registers. */
5220 static bool
5221 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5223 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5225 static bool seen_thumb1_vfp = false;
5227 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5229 sorry ("Thumb-1 hard-float VFP ABI");
5230 /* sorry() is not immediately fatal, so only display this once. */
5231 seen_thumb1_vfp = true;
5234 return true;
5237 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5238 return false;
5240 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5241 (TARGET_VFP_DOUBLE || !is_double));
5244 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5245 suitable for passing or returning in VFP registers for the PCS
5246 variant selected. If it is, then *BASE_MODE is updated to contain
5247 a machine mode describing each element of the argument's type and
5248 *COUNT to hold the number of such elements. */
5249 static bool
5250 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5251 enum machine_mode mode, const_tree type,
5252 enum machine_mode *base_mode, int *count)
5254 enum machine_mode new_mode = VOIDmode;
5256 /* If we have the type information, prefer that to working things
5257 out from the mode. */
5258 if (type)
5260 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5262 if (ag_count > 0 && ag_count <= 4)
5263 *count = ag_count;
5264 else
5265 return false;
5267 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5268 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5269 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5271 *count = 1;
5272 new_mode = mode;
5274 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5276 *count = 2;
5277 new_mode = (mode == DCmode ? DFmode : SFmode);
5279 else
5280 return false;
5283 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5284 return false;
5286 *base_mode = new_mode;
5287 return true;
5290 static bool
5291 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5292 enum machine_mode mode, const_tree type)
5294 int count ATTRIBUTE_UNUSED;
5295 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5297 if (!use_vfp_abi (pcs_variant, false))
5298 return false;
5299 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5300 &ag_mode, &count);
5303 static bool
5304 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5305 const_tree type)
5307 if (!use_vfp_abi (pcum->pcs_variant, false))
5308 return false;
5310 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5311 &pcum->aapcs_vfp_rmode,
5312 &pcum->aapcs_vfp_rcount);
5315 static bool
5316 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5317 const_tree type ATTRIBUTE_UNUSED)
5319 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5320 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5321 int regno;
5323 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5324 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5326 pcum->aapcs_vfp_reg_alloc = mask << regno;
5327 if (mode == BLKmode
5328 || (mode == TImode && ! TARGET_NEON)
5329 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5331 int i;
5332 int rcount = pcum->aapcs_vfp_rcount;
5333 int rshift = shift;
5334 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5335 rtx par;
5336 if (!TARGET_NEON)
5338 /* Avoid using unsupported vector modes. */
5339 if (rmode == V2SImode)
5340 rmode = DImode;
5341 else if (rmode == V4SImode)
5343 rmode = DImode;
5344 rcount *= 2;
5345 rshift /= 2;
5348 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5349 for (i = 0; i < rcount; i++)
5351 rtx tmp = gen_rtx_REG (rmode,
5352 FIRST_VFP_REGNUM + regno + i * rshift);
5353 tmp = gen_rtx_EXPR_LIST
5354 (VOIDmode, tmp,
5355 GEN_INT (i * GET_MODE_SIZE (rmode)));
5356 XVECEXP (par, 0, i) = tmp;
5359 pcum->aapcs_reg = par;
5361 else
5362 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5363 return true;
5365 return false;
5368 static rtx
5369 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5370 enum machine_mode mode,
5371 const_tree type ATTRIBUTE_UNUSED)
5373 if (!use_vfp_abi (pcs_variant, false))
5374 return NULL;
5376 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5378 int count;
5379 enum machine_mode ag_mode;
5380 int i;
5381 rtx par;
5382 int shift;
5384 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5385 &ag_mode, &count);
5387 if (!TARGET_NEON)
5389 if (ag_mode == V2SImode)
5390 ag_mode = DImode;
5391 else if (ag_mode == V4SImode)
5393 ag_mode = DImode;
5394 count *= 2;
5397 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5398 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5399 for (i = 0; i < count; i++)
5401 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5402 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5403 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5404 XVECEXP (par, 0, i) = tmp;
5407 return par;
5410 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5413 static void
5414 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5415 enum machine_mode mode ATTRIBUTE_UNUSED,
5416 const_tree type ATTRIBUTE_UNUSED)
5418 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5419 pcum->aapcs_vfp_reg_alloc = 0;
5420 return;
5423 #define AAPCS_CP(X) \
5425 aapcs_ ## X ## _cum_init, \
5426 aapcs_ ## X ## _is_call_candidate, \
5427 aapcs_ ## X ## _allocate, \
5428 aapcs_ ## X ## _is_return_candidate, \
5429 aapcs_ ## X ## _allocate_return_reg, \
5430 aapcs_ ## X ## _advance \
5433 /* Table of co-processors that can be used to pass arguments in
5434 registers. Idealy no arugment should be a candidate for more than
5435 one co-processor table entry, but the table is processed in order
5436 and stops after the first match. If that entry then fails to put
5437 the argument into a co-processor register, the argument will go on
5438 the stack. */
5439 static struct
5441 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5442 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5444 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5445 BLKmode) is a candidate for this co-processor's registers; this
5446 function should ignore any position-dependent state in
5447 CUMULATIVE_ARGS and only use call-type dependent information. */
5448 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5450 /* Return true if the argument does get a co-processor register; it
5451 should set aapcs_reg to an RTX of the register allocated as is
5452 required for a return from FUNCTION_ARG. */
5453 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5455 /* Return true if a result of mode MODE (or type TYPE if MODE is
5456 BLKmode) is can be returned in this co-processor's registers. */
5457 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5459 /* Allocate and return an RTX element to hold the return type of a
5460 call, this routine must not fail and will only be called if
5461 is_return_candidate returned true with the same parameters. */
5462 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5464 /* Finish processing this argument and prepare to start processing
5465 the next one. */
5466 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5467 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5469 AAPCS_CP(vfp)
5472 #undef AAPCS_CP
5474 static int
5475 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5476 const_tree type)
5478 int i;
5480 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5481 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5482 return i;
5484 return -1;
5487 static int
5488 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5490 /* We aren't passed a decl, so we can't check that a call is local.
5491 However, it isn't clear that that would be a win anyway, since it
5492 might limit some tail-calling opportunities. */
5493 enum arm_pcs pcs_variant;
5495 if (fntype)
5497 const_tree fndecl = NULL_TREE;
5499 if (TREE_CODE (fntype) == FUNCTION_DECL)
5501 fndecl = fntype;
5502 fntype = TREE_TYPE (fntype);
5505 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5507 else
5508 pcs_variant = arm_pcs_default;
5510 if (pcs_variant != ARM_PCS_AAPCS)
5512 int i;
5514 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5515 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5516 TYPE_MODE (type),
5517 type))
5518 return i;
5520 return -1;
5523 static rtx
5524 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5525 const_tree fntype)
5527 /* We aren't passed a decl, so we can't check that a call is local.
5528 However, it isn't clear that that would be a win anyway, since it
5529 might limit some tail-calling opportunities. */
5530 enum arm_pcs pcs_variant;
5531 int unsignedp ATTRIBUTE_UNUSED;
5533 if (fntype)
5535 const_tree fndecl = NULL_TREE;
5537 if (TREE_CODE (fntype) == FUNCTION_DECL)
5539 fndecl = fntype;
5540 fntype = TREE_TYPE (fntype);
5543 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5545 else
5546 pcs_variant = arm_pcs_default;
5548 /* Promote integer types. */
5549 if (type && INTEGRAL_TYPE_P (type))
5550 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5552 if (pcs_variant != ARM_PCS_AAPCS)
5554 int i;
5556 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5557 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5558 type))
5559 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5560 mode, type);
5563 /* Promotes small structs returned in a register to full-word size
5564 for big-endian AAPCS. */
5565 if (type && arm_return_in_msb (type))
5567 HOST_WIDE_INT size = int_size_in_bytes (type);
5568 if (size % UNITS_PER_WORD != 0)
5570 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5571 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5575 return gen_rtx_REG (mode, R0_REGNUM);
5578 static rtx
5579 aapcs_libcall_value (enum machine_mode mode)
5581 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5582 && GET_MODE_SIZE (mode) <= 4)
5583 mode = SImode;
5585 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5588 /* Lay out a function argument using the AAPCS rules. The rule
5589 numbers referred to here are those in the AAPCS. */
5590 static void
5591 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5592 const_tree type, bool named)
5594 int nregs, nregs2;
5595 int ncrn;
5597 /* We only need to do this once per argument. */
5598 if (pcum->aapcs_arg_processed)
5599 return;
5601 pcum->aapcs_arg_processed = true;
5603 /* Special case: if named is false then we are handling an incoming
5604 anonymous argument which is on the stack. */
5605 if (!named)
5606 return;
5608 /* Is this a potential co-processor register candidate? */
5609 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5611 int slot = aapcs_select_call_coproc (pcum, mode, type);
5612 pcum->aapcs_cprc_slot = slot;
5614 /* We don't have to apply any of the rules from part B of the
5615 preparation phase, these are handled elsewhere in the
5616 compiler. */
5618 if (slot >= 0)
5620 /* A Co-processor register candidate goes either in its own
5621 class of registers or on the stack. */
5622 if (!pcum->aapcs_cprc_failed[slot])
5624 /* C1.cp - Try to allocate the argument to co-processor
5625 registers. */
5626 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5627 return;
5629 /* C2.cp - Put the argument on the stack and note that we
5630 can't assign any more candidates in this slot. We also
5631 need to note that we have allocated stack space, so that
5632 we won't later try to split a non-cprc candidate between
5633 core registers and the stack. */
5634 pcum->aapcs_cprc_failed[slot] = true;
5635 pcum->can_split = false;
5638 /* We didn't get a register, so this argument goes on the
5639 stack. */
5640 gcc_assert (pcum->can_split == false);
5641 return;
5645 /* C3 - For double-word aligned arguments, round the NCRN up to the
5646 next even number. */
5647 ncrn = pcum->aapcs_ncrn;
5648 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5649 ncrn++;
5651 nregs = ARM_NUM_REGS2(mode, type);
5653 /* Sigh, this test should really assert that nregs > 0, but a GCC
5654 extension allows empty structs and then gives them empty size; it
5655 then allows such a structure to be passed by value. For some of
5656 the code below we have to pretend that such an argument has
5657 non-zero size so that we 'locate' it correctly either in
5658 registers or on the stack. */
5659 gcc_assert (nregs >= 0);
5661 nregs2 = nregs ? nregs : 1;
5663 /* C4 - Argument fits entirely in core registers. */
5664 if (ncrn + nregs2 <= NUM_ARG_REGS)
5666 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5667 pcum->aapcs_next_ncrn = ncrn + nregs;
5668 return;
5671 /* C5 - Some core registers left and there are no arguments already
5672 on the stack: split this argument between the remaining core
5673 registers and the stack. */
5674 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5676 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5677 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5678 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5679 return;
5682 /* C6 - NCRN is set to 4. */
5683 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5685 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5686 return;
5689 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5690 for a call to a function whose data type is FNTYPE.
5691 For a library call, FNTYPE is NULL. */
5692 void
5693 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5694 rtx libname,
5695 tree fndecl ATTRIBUTE_UNUSED)
5697 /* Long call handling. */
5698 if (fntype)
5699 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5700 else
5701 pcum->pcs_variant = arm_pcs_default;
5703 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5705 if (arm_libcall_uses_aapcs_base (libname))
5706 pcum->pcs_variant = ARM_PCS_AAPCS;
5708 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5709 pcum->aapcs_reg = NULL_RTX;
5710 pcum->aapcs_partial = 0;
5711 pcum->aapcs_arg_processed = false;
5712 pcum->aapcs_cprc_slot = -1;
5713 pcum->can_split = true;
5715 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5717 int i;
5719 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5721 pcum->aapcs_cprc_failed[i] = false;
5722 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5725 return;
5728 /* Legacy ABIs */
5730 /* On the ARM, the offset starts at 0. */
5731 pcum->nregs = 0;
5732 pcum->iwmmxt_nregs = 0;
5733 pcum->can_split = true;
5735 /* Varargs vectors are treated the same as long long.
5736 named_count avoids having to change the way arm handles 'named' */
5737 pcum->named_count = 0;
5738 pcum->nargs = 0;
5740 if (TARGET_REALLY_IWMMXT && fntype)
5742 tree fn_arg;
5744 for (fn_arg = TYPE_ARG_TYPES (fntype);
5745 fn_arg;
5746 fn_arg = TREE_CHAIN (fn_arg))
5747 pcum->named_count += 1;
5749 if (! pcum->named_count)
5750 pcum->named_count = INT_MAX;
5754 /* Return true if we use LRA instead of reload pass. */
5755 static bool
5756 arm_lra_p (void)
5758 return arm_lra_flag;
5761 /* Return true if mode/type need doubleword alignment. */
5762 static bool
5763 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5765 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5766 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5770 /* Determine where to put an argument to a function.
5771 Value is zero to push the argument on the stack,
5772 or a hard register in which to store the argument.
5774 MODE is the argument's machine mode.
5775 TYPE is the data type of the argument (as a tree).
5776 This is null for libcalls where that information may
5777 not be available.
5778 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5779 the preceding args and about the function being called.
5780 NAMED is nonzero if this argument is a named parameter
5781 (otherwise it is an extra parameter matching an ellipsis).
5783 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5784 other arguments are passed on the stack. If (NAMED == 0) (which happens
5785 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5786 defined), say it is passed in the stack (function_prologue will
5787 indeed make it pass in the stack if necessary). */
5789 static rtx
5790 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5791 const_tree type, bool named)
5793 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5794 int nregs;
5796 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5797 a call insn (op3 of a call_value insn). */
5798 if (mode == VOIDmode)
5799 return const0_rtx;
5801 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5803 aapcs_layout_arg (pcum, mode, type, named);
5804 return pcum->aapcs_reg;
5807 /* Varargs vectors are treated the same as long long.
5808 named_count avoids having to change the way arm handles 'named' */
5809 if (TARGET_IWMMXT_ABI
5810 && arm_vector_mode_supported_p (mode)
5811 && pcum->named_count > pcum->nargs + 1)
5813 if (pcum->iwmmxt_nregs <= 9)
5814 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5815 else
5817 pcum->can_split = false;
5818 return NULL_RTX;
5822 /* Put doubleword aligned quantities in even register pairs. */
5823 if (pcum->nregs & 1
5824 && ARM_DOUBLEWORD_ALIGN
5825 && arm_needs_doubleword_align (mode, type))
5826 pcum->nregs++;
5828 /* Only allow splitting an arg between regs and memory if all preceding
5829 args were allocated to regs. For args passed by reference we only count
5830 the reference pointer. */
5831 if (pcum->can_split)
5832 nregs = 1;
5833 else
5834 nregs = ARM_NUM_REGS2 (mode, type);
5836 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5837 return NULL_RTX;
5839 return gen_rtx_REG (mode, pcum->nregs);
5842 static unsigned int
5843 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5845 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5846 ? DOUBLEWORD_ALIGNMENT
5847 : PARM_BOUNDARY);
5850 static int
5851 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5852 tree type, bool named)
5854 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5855 int nregs = pcum->nregs;
5857 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5859 aapcs_layout_arg (pcum, mode, type, named);
5860 return pcum->aapcs_partial;
5863 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5864 return 0;
5866 if (NUM_ARG_REGS > nregs
5867 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5868 && pcum->can_split)
5869 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5871 return 0;
5874 /* Update the data in PCUM to advance over an argument
5875 of mode MODE and data type TYPE.
5876 (TYPE is null for libcalls where that information may not be available.) */
5878 static void
5879 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5880 const_tree type, bool named)
5882 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5884 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5886 aapcs_layout_arg (pcum, mode, type, named);
5888 if (pcum->aapcs_cprc_slot >= 0)
5890 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5891 type);
5892 pcum->aapcs_cprc_slot = -1;
5895 /* Generic stuff. */
5896 pcum->aapcs_arg_processed = false;
5897 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5898 pcum->aapcs_reg = NULL_RTX;
5899 pcum->aapcs_partial = 0;
5901 else
5903 pcum->nargs += 1;
5904 if (arm_vector_mode_supported_p (mode)
5905 && pcum->named_count > pcum->nargs
5906 && TARGET_IWMMXT_ABI)
5907 pcum->iwmmxt_nregs += 1;
5908 else
5909 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5913 /* Variable sized types are passed by reference. This is a GCC
5914 extension to the ARM ABI. */
5916 static bool
5917 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5918 enum machine_mode mode ATTRIBUTE_UNUSED,
5919 const_tree type, bool named ATTRIBUTE_UNUSED)
5921 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5924 /* Encode the current state of the #pragma [no_]long_calls. */
5925 typedef enum
5927 OFF, /* No #pragma [no_]long_calls is in effect. */
5928 LONG, /* #pragma long_calls is in effect. */
5929 SHORT /* #pragma no_long_calls is in effect. */
5930 } arm_pragma_enum;
5932 static arm_pragma_enum arm_pragma_long_calls = OFF;
5934 void
5935 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5937 arm_pragma_long_calls = LONG;
5940 void
5941 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5943 arm_pragma_long_calls = SHORT;
5946 void
5947 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5949 arm_pragma_long_calls = OFF;
5952 /* Handle an attribute requiring a FUNCTION_DECL;
5953 arguments as in struct attribute_spec.handler. */
5954 static tree
5955 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5956 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5958 if (TREE_CODE (*node) != FUNCTION_DECL)
5960 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5961 name);
5962 *no_add_attrs = true;
5965 return NULL_TREE;
5968 /* Handle an "interrupt" or "isr" attribute;
5969 arguments as in struct attribute_spec.handler. */
5970 static tree
5971 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5972 bool *no_add_attrs)
5974 if (DECL_P (*node))
5976 if (TREE_CODE (*node) != FUNCTION_DECL)
5978 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5979 name);
5980 *no_add_attrs = true;
5982 /* FIXME: the argument if any is checked for type attributes;
5983 should it be checked for decl ones? */
5985 else
5987 if (TREE_CODE (*node) == FUNCTION_TYPE
5988 || TREE_CODE (*node) == METHOD_TYPE)
5990 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5992 warning (OPT_Wattributes, "%qE attribute ignored",
5993 name);
5994 *no_add_attrs = true;
5997 else if (TREE_CODE (*node) == POINTER_TYPE
5998 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5999 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6000 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6002 *node = build_variant_type_copy (*node);
6003 TREE_TYPE (*node) = build_type_attribute_variant
6004 (TREE_TYPE (*node),
6005 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6006 *no_add_attrs = true;
6008 else
6010 /* Possibly pass this attribute on from the type to a decl. */
6011 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6012 | (int) ATTR_FLAG_FUNCTION_NEXT
6013 | (int) ATTR_FLAG_ARRAY_NEXT))
6015 *no_add_attrs = true;
6016 return tree_cons (name, args, NULL_TREE);
6018 else
6020 warning (OPT_Wattributes, "%qE attribute ignored",
6021 name);
6026 return NULL_TREE;
6029 /* Handle a "pcs" attribute; arguments as in struct
6030 attribute_spec.handler. */
6031 static tree
6032 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6033 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6035 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6037 warning (OPT_Wattributes, "%qE attribute ignored", name);
6038 *no_add_attrs = true;
6040 return NULL_TREE;
6043 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6044 /* Handle the "notshared" attribute. This attribute is another way of
6045 requesting hidden visibility. ARM's compiler supports
6046 "__declspec(notshared)"; we support the same thing via an
6047 attribute. */
6049 static tree
6050 arm_handle_notshared_attribute (tree *node,
6051 tree name ATTRIBUTE_UNUSED,
6052 tree args ATTRIBUTE_UNUSED,
6053 int flags ATTRIBUTE_UNUSED,
6054 bool *no_add_attrs)
6056 tree decl = TYPE_NAME (*node);
6058 if (decl)
6060 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6061 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6062 *no_add_attrs = false;
6064 return NULL_TREE;
6066 #endif
6068 /* Return 0 if the attributes for two types are incompatible, 1 if they
6069 are compatible, and 2 if they are nearly compatible (which causes a
6070 warning to be generated). */
6071 static int
6072 arm_comp_type_attributes (const_tree type1, const_tree type2)
6074 int l1, l2, s1, s2;
6076 /* Check for mismatch of non-default calling convention. */
6077 if (TREE_CODE (type1) != FUNCTION_TYPE)
6078 return 1;
6080 /* Check for mismatched call attributes. */
6081 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6082 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6083 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6084 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6086 /* Only bother to check if an attribute is defined. */
6087 if (l1 | l2 | s1 | s2)
6089 /* If one type has an attribute, the other must have the same attribute. */
6090 if ((l1 != l2) || (s1 != s2))
6091 return 0;
6093 /* Disallow mixed attributes. */
6094 if ((l1 & s2) || (l2 & s1))
6095 return 0;
6098 /* Check for mismatched ISR attribute. */
6099 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6100 if (! l1)
6101 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6102 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6103 if (! l2)
6104 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6105 if (l1 != l2)
6106 return 0;
6108 return 1;
6111 /* Assigns default attributes to newly defined type. This is used to
6112 set short_call/long_call attributes for function types of
6113 functions defined inside corresponding #pragma scopes. */
6114 static void
6115 arm_set_default_type_attributes (tree type)
6117 /* Add __attribute__ ((long_call)) to all functions, when
6118 inside #pragma long_calls or __attribute__ ((short_call)),
6119 when inside #pragma no_long_calls. */
6120 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6122 tree type_attr_list, attr_name;
6123 type_attr_list = TYPE_ATTRIBUTES (type);
6125 if (arm_pragma_long_calls == LONG)
6126 attr_name = get_identifier ("long_call");
6127 else if (arm_pragma_long_calls == SHORT)
6128 attr_name = get_identifier ("short_call");
6129 else
6130 return;
6132 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6133 TYPE_ATTRIBUTES (type) = type_attr_list;
6137 /* Return true if DECL is known to be linked into section SECTION. */
6139 static bool
6140 arm_function_in_section_p (tree decl, section *section)
6142 /* We can only be certain about functions defined in the same
6143 compilation unit. */
6144 if (!TREE_STATIC (decl))
6145 return false;
6147 /* Make sure that SYMBOL always binds to the definition in this
6148 compilation unit. */
6149 if (!targetm.binds_local_p (decl))
6150 return false;
6152 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6153 if (!DECL_SECTION_NAME (decl))
6155 /* Make sure that we will not create a unique section for DECL. */
6156 if (flag_function_sections || DECL_ONE_ONLY (decl))
6157 return false;
6160 return function_section (decl) == section;
6163 /* Return nonzero if a 32-bit "long_call" should be generated for
6164 a call from the current function to DECL. We generate a long_call
6165 if the function:
6167 a. has an __attribute__((long call))
6168 or b. is within the scope of a #pragma long_calls
6169 or c. the -mlong-calls command line switch has been specified
6171 However we do not generate a long call if the function:
6173 d. has an __attribute__ ((short_call))
6174 or e. is inside the scope of a #pragma no_long_calls
6175 or f. is defined in the same section as the current function. */
6177 bool
6178 arm_is_long_call_p (tree decl)
6180 tree attrs;
6182 if (!decl)
6183 return TARGET_LONG_CALLS;
6185 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6186 if (lookup_attribute ("short_call", attrs))
6187 return false;
6189 /* For "f", be conservative, and only cater for cases in which the
6190 whole of the current function is placed in the same section. */
6191 if (!flag_reorder_blocks_and_partition
6192 && TREE_CODE (decl) == FUNCTION_DECL
6193 && arm_function_in_section_p (decl, current_function_section ()))
6194 return false;
6196 if (lookup_attribute ("long_call", attrs))
6197 return true;
6199 return TARGET_LONG_CALLS;
6202 /* Return nonzero if it is ok to make a tail-call to DECL. */
6203 static bool
6204 arm_function_ok_for_sibcall (tree decl, tree exp)
6206 unsigned long func_type;
6208 if (cfun->machine->sibcall_blocked)
6209 return false;
6211 /* Never tailcall something if we are generating code for Thumb-1. */
6212 if (TARGET_THUMB1)
6213 return false;
6215 /* The PIC register is live on entry to VxWorks PLT entries, so we
6216 must make the call before restoring the PIC register. */
6217 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6218 return false;
6220 /* Cannot tail-call to long calls, since these are out of range of
6221 a branch instruction. */
6222 if (decl && arm_is_long_call_p (decl))
6223 return false;
6225 /* If we are interworking and the function is not declared static
6226 then we can't tail-call it unless we know that it exists in this
6227 compilation unit (since it might be a Thumb routine). */
6228 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6229 && !TREE_ASM_WRITTEN (decl))
6230 return false;
6232 func_type = arm_current_func_type ();
6233 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6234 if (IS_INTERRUPT (func_type))
6235 return false;
6237 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6239 /* Check that the return value locations are the same. For
6240 example that we aren't returning a value from the sibling in
6241 a VFP register but then need to transfer it to a core
6242 register. */
6243 rtx a, b;
6245 a = arm_function_value (TREE_TYPE (exp), decl, false);
6246 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6247 cfun->decl, false);
6248 if (!rtx_equal_p (a, b))
6249 return false;
6252 /* Never tailcall if function may be called with a misaligned SP. */
6253 if (IS_STACKALIGN (func_type))
6254 return false;
6256 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6257 references should become a NOP. Don't convert such calls into
6258 sibling calls. */
6259 if (TARGET_AAPCS_BASED
6260 && arm_abi == ARM_ABI_AAPCS
6261 && decl
6262 && DECL_WEAK (decl))
6263 return false;
6265 /* Everything else is ok. */
6266 return true;
6270 /* Addressing mode support functions. */
6272 /* Return nonzero if X is a legitimate immediate operand when compiling
6273 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6275 legitimate_pic_operand_p (rtx x)
6277 if (GET_CODE (x) == SYMBOL_REF
6278 || (GET_CODE (x) == CONST
6279 && GET_CODE (XEXP (x, 0)) == PLUS
6280 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6281 return 0;
6283 return 1;
6286 /* Record that the current function needs a PIC register. Initialize
6287 cfun->machine->pic_reg if we have not already done so. */
6289 static void
6290 require_pic_register (void)
6292 /* A lot of the logic here is made obscure by the fact that this
6293 routine gets called as part of the rtx cost estimation process.
6294 We don't want those calls to affect any assumptions about the real
6295 function; and further, we can't call entry_of_function() until we
6296 start the real expansion process. */
6297 if (!crtl->uses_pic_offset_table)
6299 gcc_assert (can_create_pseudo_p ());
6300 if (arm_pic_register != INVALID_REGNUM
6301 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6303 if (!cfun->machine->pic_reg)
6304 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6306 /* Play games to avoid marking the function as needing pic
6307 if we are being called as part of the cost-estimation
6308 process. */
6309 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6310 crtl->uses_pic_offset_table = 1;
6312 else
6314 rtx seq, insn;
6316 if (!cfun->machine->pic_reg)
6317 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6319 /* Play games to avoid marking the function as needing pic
6320 if we are being called as part of the cost-estimation
6321 process. */
6322 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6324 crtl->uses_pic_offset_table = 1;
6325 start_sequence ();
6327 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6328 && arm_pic_register > LAST_LO_REGNUM)
6329 emit_move_insn (cfun->machine->pic_reg,
6330 gen_rtx_REG (Pmode, arm_pic_register));
6331 else
6332 arm_load_pic_register (0UL);
6334 seq = get_insns ();
6335 end_sequence ();
6337 for (insn = seq; insn; insn = NEXT_INSN (insn))
6338 if (INSN_P (insn))
6339 INSN_LOCATION (insn) = prologue_location;
6341 /* We can be called during expansion of PHI nodes, where
6342 we can't yet emit instructions directly in the final
6343 insn stream. Queue the insns on the entry edge, they will
6344 be committed after everything else is expanded. */
6345 insert_insn_on_edge (seq,
6346 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6353 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6355 if (GET_CODE (orig) == SYMBOL_REF
6356 || GET_CODE (orig) == LABEL_REF)
6358 rtx insn;
6360 if (reg == 0)
6362 gcc_assert (can_create_pseudo_p ());
6363 reg = gen_reg_rtx (Pmode);
6366 /* VxWorks does not impose a fixed gap between segments; the run-time
6367 gap can be different from the object-file gap. We therefore can't
6368 use GOTOFF unless we are absolutely sure that the symbol is in the
6369 same segment as the GOT. Unfortunately, the flexibility of linker
6370 scripts means that we can't be sure of that in general, so assume
6371 that GOTOFF is never valid on VxWorks. */
6372 if ((GET_CODE (orig) == LABEL_REF
6373 || (GET_CODE (orig) == SYMBOL_REF &&
6374 SYMBOL_REF_LOCAL_P (orig)))
6375 && NEED_GOT_RELOC
6376 && arm_pic_data_is_text_relative)
6377 insn = arm_pic_static_addr (orig, reg);
6378 else
6380 rtx pat;
6381 rtx mem;
6383 /* If this function doesn't have a pic register, create one now. */
6384 require_pic_register ();
6386 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6388 /* Make the MEM as close to a constant as possible. */
6389 mem = SET_SRC (pat);
6390 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6391 MEM_READONLY_P (mem) = 1;
6392 MEM_NOTRAP_P (mem) = 1;
6394 insn = emit_insn (pat);
6397 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6398 by loop. */
6399 set_unique_reg_note (insn, REG_EQUAL, orig);
6401 return reg;
6403 else if (GET_CODE (orig) == CONST)
6405 rtx base, offset;
6407 if (GET_CODE (XEXP (orig, 0)) == PLUS
6408 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6409 return orig;
6411 /* Handle the case where we have: const (UNSPEC_TLS). */
6412 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6413 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6414 return orig;
6416 /* Handle the case where we have:
6417 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6418 CONST_INT. */
6419 if (GET_CODE (XEXP (orig, 0)) == PLUS
6420 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6421 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6423 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6424 return orig;
6427 if (reg == 0)
6429 gcc_assert (can_create_pseudo_p ());
6430 reg = gen_reg_rtx (Pmode);
6433 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6435 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6436 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6437 base == reg ? 0 : reg);
6439 if (CONST_INT_P (offset))
6441 /* The base register doesn't really matter, we only want to
6442 test the index for the appropriate mode. */
6443 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6445 gcc_assert (can_create_pseudo_p ());
6446 offset = force_reg (Pmode, offset);
6449 if (CONST_INT_P (offset))
6450 return plus_constant (Pmode, base, INTVAL (offset));
6453 if (GET_MODE_SIZE (mode) > 4
6454 && (GET_MODE_CLASS (mode) == MODE_INT
6455 || TARGET_SOFT_FLOAT))
6457 emit_insn (gen_addsi3 (reg, base, offset));
6458 return reg;
6461 return gen_rtx_PLUS (Pmode, base, offset);
6464 return orig;
6468 /* Find a spare register to use during the prolog of a function. */
6470 static int
6471 thumb_find_work_register (unsigned long pushed_regs_mask)
6473 int reg;
6475 /* Check the argument registers first as these are call-used. The
6476 register allocation order means that sometimes r3 might be used
6477 but earlier argument registers might not, so check them all. */
6478 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6479 if (!df_regs_ever_live_p (reg))
6480 return reg;
6482 /* Before going on to check the call-saved registers we can try a couple
6483 more ways of deducing that r3 is available. The first is when we are
6484 pushing anonymous arguments onto the stack and we have less than 4
6485 registers worth of fixed arguments(*). In this case r3 will be part of
6486 the variable argument list and so we can be sure that it will be
6487 pushed right at the start of the function. Hence it will be available
6488 for the rest of the prologue.
6489 (*): ie crtl->args.pretend_args_size is greater than 0. */
6490 if (cfun->machine->uses_anonymous_args
6491 && crtl->args.pretend_args_size > 0)
6492 return LAST_ARG_REGNUM;
6494 /* The other case is when we have fixed arguments but less than 4 registers
6495 worth. In this case r3 might be used in the body of the function, but
6496 it is not being used to convey an argument into the function. In theory
6497 we could just check crtl->args.size to see how many bytes are
6498 being passed in argument registers, but it seems that it is unreliable.
6499 Sometimes it will have the value 0 when in fact arguments are being
6500 passed. (See testcase execute/20021111-1.c for an example). So we also
6501 check the args_info.nregs field as well. The problem with this field is
6502 that it makes no allowances for arguments that are passed to the
6503 function but which are not used. Hence we could miss an opportunity
6504 when a function has an unused argument in r3. But it is better to be
6505 safe than to be sorry. */
6506 if (! cfun->machine->uses_anonymous_args
6507 && crtl->args.size >= 0
6508 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6509 && (TARGET_AAPCS_BASED
6510 ? crtl->args.info.aapcs_ncrn < 4
6511 : crtl->args.info.nregs < 4))
6512 return LAST_ARG_REGNUM;
6514 /* Otherwise look for a call-saved register that is going to be pushed. */
6515 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6516 if (pushed_regs_mask & (1 << reg))
6517 return reg;
6519 if (TARGET_THUMB2)
6521 /* Thumb-2 can use high regs. */
6522 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6523 if (pushed_regs_mask & (1 << reg))
6524 return reg;
6526 /* Something went wrong - thumb_compute_save_reg_mask()
6527 should have arranged for a suitable register to be pushed. */
6528 gcc_unreachable ();
6531 static GTY(()) int pic_labelno;
6533 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6534 low register. */
6536 void
6537 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6539 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6541 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6542 return;
6544 gcc_assert (flag_pic);
6546 pic_reg = cfun->machine->pic_reg;
6547 if (TARGET_VXWORKS_RTP)
6549 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6550 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6551 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6553 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6555 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6556 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6558 else
6560 /* We use an UNSPEC rather than a LABEL_REF because this label
6561 never appears in the code stream. */
6563 labelno = GEN_INT (pic_labelno++);
6564 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6565 l1 = gen_rtx_CONST (VOIDmode, l1);
6567 /* On the ARM the PC register contains 'dot + 8' at the time of the
6568 addition, on the Thumb it is 'dot + 4'. */
6569 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6570 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6571 UNSPEC_GOTSYM_OFF);
6572 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6574 if (TARGET_32BIT)
6576 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6578 else /* TARGET_THUMB1 */
6580 if (arm_pic_register != INVALID_REGNUM
6581 && REGNO (pic_reg) > LAST_LO_REGNUM)
6583 /* We will have pushed the pic register, so we should always be
6584 able to find a work register. */
6585 pic_tmp = gen_rtx_REG (SImode,
6586 thumb_find_work_register (saved_regs));
6587 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6588 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6589 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6591 else if (arm_pic_register != INVALID_REGNUM
6592 && arm_pic_register > LAST_LO_REGNUM
6593 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6595 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6596 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6597 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6599 else
6600 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6604 /* Need to emit this whether or not we obey regdecls,
6605 since setjmp/longjmp can cause life info to screw up. */
6606 emit_use (pic_reg);
6609 /* Generate code to load the address of a static var when flag_pic is set. */
6610 static rtx
6611 arm_pic_static_addr (rtx orig, rtx reg)
6613 rtx l1, labelno, offset_rtx, insn;
6615 gcc_assert (flag_pic);
6617 /* We use an UNSPEC rather than a LABEL_REF because this label
6618 never appears in the code stream. */
6619 labelno = GEN_INT (pic_labelno++);
6620 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6621 l1 = gen_rtx_CONST (VOIDmode, l1);
6623 /* On the ARM the PC register contains 'dot + 8' at the time of the
6624 addition, on the Thumb it is 'dot + 4'. */
6625 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6626 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6627 UNSPEC_SYMBOL_OFFSET);
6628 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6630 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6631 return insn;
6634 /* Return nonzero if X is valid as an ARM state addressing register. */
6635 static int
6636 arm_address_register_rtx_p (rtx x, int strict_p)
6638 int regno;
6640 if (!REG_P (x))
6641 return 0;
6643 regno = REGNO (x);
6645 if (strict_p)
6646 return ARM_REGNO_OK_FOR_BASE_P (regno);
6648 return (regno <= LAST_ARM_REGNUM
6649 || regno >= FIRST_PSEUDO_REGISTER
6650 || regno == FRAME_POINTER_REGNUM
6651 || regno == ARG_POINTER_REGNUM);
6654 /* Return TRUE if this rtx is the difference of a symbol and a label,
6655 and will reduce to a PC-relative relocation in the object file.
6656 Expressions like this can be left alone when generating PIC, rather
6657 than forced through the GOT. */
6658 static int
6659 pcrel_constant_p (rtx x)
6661 if (GET_CODE (x) == MINUS)
6662 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6664 return FALSE;
6667 /* Return true if X will surely end up in an index register after next
6668 splitting pass. */
6669 static bool
6670 will_be_in_index_register (const_rtx x)
6672 /* arm.md: calculate_pic_address will split this into a register. */
6673 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6676 /* Return nonzero if X is a valid ARM state address operand. */
6678 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6679 int strict_p)
6681 bool use_ldrd;
6682 enum rtx_code code = GET_CODE (x);
6684 if (arm_address_register_rtx_p (x, strict_p))
6685 return 1;
6687 use_ldrd = (TARGET_LDRD
6688 && (mode == DImode
6689 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6691 if (code == POST_INC || code == PRE_DEC
6692 || ((code == PRE_INC || code == POST_DEC)
6693 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6694 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6696 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6697 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6698 && GET_CODE (XEXP (x, 1)) == PLUS
6699 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6701 rtx addend = XEXP (XEXP (x, 1), 1);
6703 /* Don't allow ldrd post increment by register because it's hard
6704 to fixup invalid register choices. */
6705 if (use_ldrd
6706 && GET_CODE (x) == POST_MODIFY
6707 && REG_P (addend))
6708 return 0;
6710 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6711 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6714 /* After reload constants split into minipools will have addresses
6715 from a LABEL_REF. */
6716 else if (reload_completed
6717 && (code == LABEL_REF
6718 || (code == CONST
6719 && GET_CODE (XEXP (x, 0)) == PLUS
6720 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6721 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6722 return 1;
6724 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6725 return 0;
6727 else if (code == PLUS)
6729 rtx xop0 = XEXP (x, 0);
6730 rtx xop1 = XEXP (x, 1);
6732 return ((arm_address_register_rtx_p (xop0, strict_p)
6733 && ((CONST_INT_P (xop1)
6734 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6735 || (!strict_p && will_be_in_index_register (xop1))))
6736 || (arm_address_register_rtx_p (xop1, strict_p)
6737 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6740 #if 0
6741 /* Reload currently can't handle MINUS, so disable this for now */
6742 else if (GET_CODE (x) == MINUS)
6744 rtx xop0 = XEXP (x, 0);
6745 rtx xop1 = XEXP (x, 1);
6747 return (arm_address_register_rtx_p (xop0, strict_p)
6748 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6750 #endif
6752 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6753 && code == SYMBOL_REF
6754 && CONSTANT_POOL_ADDRESS_P (x)
6755 && ! (flag_pic
6756 && symbol_mentioned_p (get_pool_constant (x))
6757 && ! pcrel_constant_p (get_pool_constant (x))))
6758 return 1;
6760 return 0;
6763 /* Return nonzero if X is a valid Thumb-2 address operand. */
6764 static int
6765 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6767 bool use_ldrd;
6768 enum rtx_code code = GET_CODE (x);
6770 if (arm_address_register_rtx_p (x, strict_p))
6771 return 1;
6773 use_ldrd = (TARGET_LDRD
6774 && (mode == DImode
6775 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6777 if (code == POST_INC || code == PRE_DEC
6778 || ((code == PRE_INC || code == POST_DEC)
6779 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6780 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6782 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6783 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6784 && GET_CODE (XEXP (x, 1)) == PLUS
6785 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6787 /* Thumb-2 only has autoincrement by constant. */
6788 rtx addend = XEXP (XEXP (x, 1), 1);
6789 HOST_WIDE_INT offset;
6791 if (!CONST_INT_P (addend))
6792 return 0;
6794 offset = INTVAL(addend);
6795 if (GET_MODE_SIZE (mode) <= 4)
6796 return (offset > -256 && offset < 256);
6798 return (use_ldrd && offset > -1024 && offset < 1024
6799 && (offset & 3) == 0);
6802 /* After reload constants split into minipools will have addresses
6803 from a LABEL_REF. */
6804 else if (reload_completed
6805 && (code == LABEL_REF
6806 || (code == CONST
6807 && GET_CODE (XEXP (x, 0)) == PLUS
6808 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6809 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6810 return 1;
6812 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6813 return 0;
6815 else if (code == PLUS)
6817 rtx xop0 = XEXP (x, 0);
6818 rtx xop1 = XEXP (x, 1);
6820 return ((arm_address_register_rtx_p (xop0, strict_p)
6821 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6822 || (!strict_p && will_be_in_index_register (xop1))))
6823 || (arm_address_register_rtx_p (xop1, strict_p)
6824 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6827 /* Normally we can assign constant values to target registers without
6828 the help of constant pool. But there are cases we have to use constant
6829 pool like:
6830 1) assign a label to register.
6831 2) sign-extend a 8bit value to 32bit and then assign to register.
6833 Constant pool access in format:
6834 (set (reg r0) (mem (symbol_ref (".LC0"))))
6835 will cause the use of literal pool (later in function arm_reorg).
6836 So here we mark such format as an invalid format, then the compiler
6837 will adjust it into:
6838 (set (reg r0) (symbol_ref (".LC0")))
6839 (set (reg r0) (mem (reg r0))).
6840 No extra register is required, and (mem (reg r0)) won't cause the use
6841 of literal pools. */
6842 else if (arm_disable_literal_pool && code == SYMBOL_REF
6843 && CONSTANT_POOL_ADDRESS_P (x))
6844 return 0;
6846 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6847 && code == SYMBOL_REF
6848 && CONSTANT_POOL_ADDRESS_P (x)
6849 && ! (flag_pic
6850 && symbol_mentioned_p (get_pool_constant (x))
6851 && ! pcrel_constant_p (get_pool_constant (x))))
6852 return 1;
6854 return 0;
6857 /* Return nonzero if INDEX is valid for an address index operand in
6858 ARM state. */
6859 static int
6860 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6861 int strict_p)
6863 HOST_WIDE_INT range;
6864 enum rtx_code code = GET_CODE (index);
6866 /* Standard coprocessor addressing modes. */
6867 if (TARGET_HARD_FLOAT
6868 && TARGET_VFP
6869 && (mode == SFmode || mode == DFmode))
6870 return (code == CONST_INT && INTVAL (index) < 1024
6871 && INTVAL (index) > -1024
6872 && (INTVAL (index) & 3) == 0);
6874 /* For quad modes, we restrict the constant offset to be slightly less
6875 than what the instruction format permits. We do this because for
6876 quad mode moves, we will actually decompose them into two separate
6877 double-mode reads or writes. INDEX must therefore be a valid
6878 (double-mode) offset and so should INDEX+8. */
6879 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6880 return (code == CONST_INT
6881 && INTVAL (index) < 1016
6882 && INTVAL (index) > -1024
6883 && (INTVAL (index) & 3) == 0);
6885 /* We have no such constraint on double mode offsets, so we permit the
6886 full range of the instruction format. */
6887 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6888 return (code == CONST_INT
6889 && INTVAL (index) < 1024
6890 && INTVAL (index) > -1024
6891 && (INTVAL (index) & 3) == 0);
6893 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6894 return (code == CONST_INT
6895 && INTVAL (index) < 1024
6896 && INTVAL (index) > -1024
6897 && (INTVAL (index) & 3) == 0);
6899 if (arm_address_register_rtx_p (index, strict_p)
6900 && (GET_MODE_SIZE (mode) <= 4))
6901 return 1;
6903 if (mode == DImode || mode == DFmode)
6905 if (code == CONST_INT)
6907 HOST_WIDE_INT val = INTVAL (index);
6909 if (TARGET_LDRD)
6910 return val > -256 && val < 256;
6911 else
6912 return val > -4096 && val < 4092;
6915 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6918 if (GET_MODE_SIZE (mode) <= 4
6919 && ! (arm_arch4
6920 && (mode == HImode
6921 || mode == HFmode
6922 || (mode == QImode && outer == SIGN_EXTEND))))
6924 if (code == MULT)
6926 rtx xiop0 = XEXP (index, 0);
6927 rtx xiop1 = XEXP (index, 1);
6929 return ((arm_address_register_rtx_p (xiop0, strict_p)
6930 && power_of_two_operand (xiop1, SImode))
6931 || (arm_address_register_rtx_p (xiop1, strict_p)
6932 && power_of_two_operand (xiop0, SImode)));
6934 else if (code == LSHIFTRT || code == ASHIFTRT
6935 || code == ASHIFT || code == ROTATERT)
6937 rtx op = XEXP (index, 1);
6939 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6940 && CONST_INT_P (op)
6941 && INTVAL (op) > 0
6942 && INTVAL (op) <= 31);
6946 /* For ARM v4 we may be doing a sign-extend operation during the
6947 load. */
6948 if (arm_arch4)
6950 if (mode == HImode
6951 || mode == HFmode
6952 || (outer == SIGN_EXTEND && mode == QImode))
6953 range = 256;
6954 else
6955 range = 4096;
6957 else
6958 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6960 return (code == CONST_INT
6961 && INTVAL (index) < range
6962 && INTVAL (index) > -range);
6965 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6966 index operand. i.e. 1, 2, 4 or 8. */
6967 static bool
6968 thumb2_index_mul_operand (rtx op)
6970 HOST_WIDE_INT val;
6972 if (!CONST_INT_P (op))
6973 return false;
6975 val = INTVAL(op);
6976 return (val == 1 || val == 2 || val == 4 || val == 8);
6979 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6980 static int
6981 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6983 enum rtx_code code = GET_CODE (index);
6985 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6986 /* Standard coprocessor addressing modes. */
6987 if (TARGET_HARD_FLOAT
6988 && TARGET_VFP
6989 && (mode == SFmode || mode == DFmode))
6990 return (code == CONST_INT && INTVAL (index) < 1024
6991 /* Thumb-2 allows only > -256 index range for it's core register
6992 load/stores. Since we allow SF/DF in core registers, we have
6993 to use the intersection between -256~4096 (core) and -1024~1024
6994 (coprocessor). */
6995 && INTVAL (index) > -256
6996 && (INTVAL (index) & 3) == 0);
6998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7000 /* For DImode assume values will usually live in core regs
7001 and only allow LDRD addressing modes. */
7002 if (!TARGET_LDRD || mode != DImode)
7003 return (code == CONST_INT
7004 && INTVAL (index) < 1024
7005 && INTVAL (index) > -1024
7006 && (INTVAL (index) & 3) == 0);
7009 /* For quad modes, we restrict the constant offset to be slightly less
7010 than what the instruction format permits. We do this because for
7011 quad mode moves, we will actually decompose them into two separate
7012 double-mode reads or writes. INDEX must therefore be a valid
7013 (double-mode) offset and so should INDEX+8. */
7014 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7015 return (code == CONST_INT
7016 && INTVAL (index) < 1016
7017 && INTVAL (index) > -1024
7018 && (INTVAL (index) & 3) == 0);
7020 /* We have no such constraint on double mode offsets, so we permit the
7021 full range of the instruction format. */
7022 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7023 return (code == CONST_INT
7024 && INTVAL (index) < 1024
7025 && INTVAL (index) > -1024
7026 && (INTVAL (index) & 3) == 0);
7028 if (arm_address_register_rtx_p (index, strict_p)
7029 && (GET_MODE_SIZE (mode) <= 4))
7030 return 1;
7032 if (mode == DImode || mode == DFmode)
7034 if (code == CONST_INT)
7036 HOST_WIDE_INT val = INTVAL (index);
7037 /* ??? Can we assume ldrd for thumb2? */
7038 /* Thumb-2 ldrd only has reg+const addressing modes. */
7039 /* ldrd supports offsets of +-1020.
7040 However the ldr fallback does not. */
7041 return val > -256 && val < 256 && (val & 3) == 0;
7043 else
7044 return 0;
7047 if (code == MULT)
7049 rtx xiop0 = XEXP (index, 0);
7050 rtx xiop1 = XEXP (index, 1);
7052 return ((arm_address_register_rtx_p (xiop0, strict_p)
7053 && thumb2_index_mul_operand (xiop1))
7054 || (arm_address_register_rtx_p (xiop1, strict_p)
7055 && thumb2_index_mul_operand (xiop0)));
7057 else if (code == ASHIFT)
7059 rtx op = XEXP (index, 1);
7061 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7062 && CONST_INT_P (op)
7063 && INTVAL (op) > 0
7064 && INTVAL (op) <= 3);
7067 return (code == CONST_INT
7068 && INTVAL (index) < 4096
7069 && INTVAL (index) > -256);
7072 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7073 static int
7074 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7076 int regno;
7078 if (!REG_P (x))
7079 return 0;
7081 regno = REGNO (x);
7083 if (strict_p)
7084 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7086 return (regno <= LAST_LO_REGNUM
7087 || regno > LAST_VIRTUAL_REGISTER
7088 || regno == FRAME_POINTER_REGNUM
7089 || (GET_MODE_SIZE (mode) >= 4
7090 && (regno == STACK_POINTER_REGNUM
7091 || regno >= FIRST_PSEUDO_REGISTER
7092 || x == hard_frame_pointer_rtx
7093 || x == arg_pointer_rtx)));
7096 /* Return nonzero if x is a legitimate index register. This is the case
7097 for any base register that can access a QImode object. */
7098 inline static int
7099 thumb1_index_register_rtx_p (rtx x, int strict_p)
7101 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7104 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7106 The AP may be eliminated to either the SP or the FP, so we use the
7107 least common denominator, e.g. SImode, and offsets from 0 to 64.
7109 ??? Verify whether the above is the right approach.
7111 ??? Also, the FP may be eliminated to the SP, so perhaps that
7112 needs special handling also.
7114 ??? Look at how the mips16 port solves this problem. It probably uses
7115 better ways to solve some of these problems.
7117 Although it is not incorrect, we don't accept QImode and HImode
7118 addresses based on the frame pointer or arg pointer until the
7119 reload pass starts. This is so that eliminating such addresses
7120 into stack based ones won't produce impossible code. */
7122 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7124 /* ??? Not clear if this is right. Experiment. */
7125 if (GET_MODE_SIZE (mode) < 4
7126 && !(reload_in_progress || reload_completed)
7127 && (reg_mentioned_p (frame_pointer_rtx, x)
7128 || reg_mentioned_p (arg_pointer_rtx, x)
7129 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7130 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7131 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7132 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7133 return 0;
7135 /* Accept any base register. SP only in SImode or larger. */
7136 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7137 return 1;
7139 /* This is PC relative data before arm_reorg runs. */
7140 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7141 && GET_CODE (x) == SYMBOL_REF
7142 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7143 return 1;
7145 /* This is PC relative data after arm_reorg runs. */
7146 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7147 && reload_completed
7148 && (GET_CODE (x) == LABEL_REF
7149 || (GET_CODE (x) == CONST
7150 && GET_CODE (XEXP (x, 0)) == PLUS
7151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7152 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7153 return 1;
7155 /* Post-inc indexing only supported for SImode and larger. */
7156 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7157 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7158 return 1;
7160 else if (GET_CODE (x) == PLUS)
7162 /* REG+REG address can be any two index registers. */
7163 /* We disallow FRAME+REG addressing since we know that FRAME
7164 will be replaced with STACK, and SP relative addressing only
7165 permits SP+OFFSET. */
7166 if (GET_MODE_SIZE (mode) <= 4
7167 && XEXP (x, 0) != frame_pointer_rtx
7168 && XEXP (x, 1) != frame_pointer_rtx
7169 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7170 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7171 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7172 return 1;
7174 /* REG+const has 5-7 bit offset for non-SP registers. */
7175 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7176 || XEXP (x, 0) == arg_pointer_rtx)
7177 && CONST_INT_P (XEXP (x, 1))
7178 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7179 return 1;
7181 /* REG+const has 10-bit offset for SP, but only SImode and
7182 larger is supported. */
7183 /* ??? Should probably check for DI/DFmode overflow here
7184 just like GO_IF_LEGITIMATE_OFFSET does. */
7185 else if (REG_P (XEXP (x, 0))
7186 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7187 && GET_MODE_SIZE (mode) >= 4
7188 && CONST_INT_P (XEXP (x, 1))
7189 && INTVAL (XEXP (x, 1)) >= 0
7190 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7191 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7192 return 1;
7194 else if (REG_P (XEXP (x, 0))
7195 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7196 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7197 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7198 && REGNO (XEXP (x, 0))
7199 <= LAST_VIRTUAL_POINTER_REGISTER))
7200 && GET_MODE_SIZE (mode) >= 4
7201 && CONST_INT_P (XEXP (x, 1))
7202 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7203 return 1;
7206 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7207 && GET_MODE_SIZE (mode) == 4
7208 && GET_CODE (x) == SYMBOL_REF
7209 && CONSTANT_POOL_ADDRESS_P (x)
7210 && ! (flag_pic
7211 && symbol_mentioned_p (get_pool_constant (x))
7212 && ! pcrel_constant_p (get_pool_constant (x))))
7213 return 1;
7215 return 0;
7218 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7219 instruction of mode MODE. */
7221 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7223 switch (GET_MODE_SIZE (mode))
7225 case 1:
7226 return val >= 0 && val < 32;
7228 case 2:
7229 return val >= 0 && val < 64 && (val & 1) == 0;
7231 default:
7232 return (val >= 0
7233 && (val + GET_MODE_SIZE (mode)) <= 128
7234 && (val & 3) == 0);
7238 bool
7239 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7241 if (TARGET_ARM)
7242 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7243 else if (TARGET_THUMB2)
7244 return thumb2_legitimate_address_p (mode, x, strict_p);
7245 else /* if (TARGET_THUMB1) */
7246 return thumb1_legitimate_address_p (mode, x, strict_p);
7249 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7251 Given an rtx X being reloaded into a reg required to be
7252 in class CLASS, return the class of reg to actually use.
7253 In general this is just CLASS, but for the Thumb core registers and
7254 immediate constants we prefer a LO_REGS class or a subset. */
7256 static reg_class_t
7257 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7259 if (TARGET_32BIT)
7260 return rclass;
7261 else
7263 if (rclass == GENERAL_REGS)
7264 return LO_REGS;
7265 else
7266 return rclass;
7270 /* Build the SYMBOL_REF for __tls_get_addr. */
7272 static GTY(()) rtx tls_get_addr_libfunc;
7274 static rtx
7275 get_tls_get_addr (void)
7277 if (!tls_get_addr_libfunc)
7278 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7279 return tls_get_addr_libfunc;
7283 arm_load_tp (rtx target)
7285 if (!target)
7286 target = gen_reg_rtx (SImode);
7288 if (TARGET_HARD_TP)
7290 /* Can return in any reg. */
7291 emit_insn (gen_load_tp_hard (target));
7293 else
7295 /* Always returned in r0. Immediately copy the result into a pseudo,
7296 otherwise other uses of r0 (e.g. setting up function arguments) may
7297 clobber the value. */
7299 rtx tmp;
7301 emit_insn (gen_load_tp_soft ());
7303 tmp = gen_rtx_REG (SImode, 0);
7304 emit_move_insn (target, tmp);
7306 return target;
7309 static rtx
7310 load_tls_operand (rtx x, rtx reg)
7312 rtx tmp;
7314 if (reg == NULL_RTX)
7315 reg = gen_reg_rtx (SImode);
7317 tmp = gen_rtx_CONST (SImode, x);
7319 emit_move_insn (reg, tmp);
7321 return reg;
7324 static rtx
7325 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7327 rtx insns, label, labelno, sum;
7329 gcc_assert (reloc != TLS_DESCSEQ);
7330 start_sequence ();
7332 labelno = GEN_INT (pic_labelno++);
7333 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7334 label = gen_rtx_CONST (VOIDmode, label);
7336 sum = gen_rtx_UNSPEC (Pmode,
7337 gen_rtvec (4, x, GEN_INT (reloc), label,
7338 GEN_INT (TARGET_ARM ? 8 : 4)),
7339 UNSPEC_TLS);
7340 reg = load_tls_operand (sum, reg);
7342 if (TARGET_ARM)
7343 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7344 else
7345 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7347 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7348 LCT_PURE, /* LCT_CONST? */
7349 Pmode, 1, reg, Pmode);
7351 insns = get_insns ();
7352 end_sequence ();
7354 return insns;
7357 static rtx
7358 arm_tls_descseq_addr (rtx x, rtx reg)
7360 rtx labelno = GEN_INT (pic_labelno++);
7361 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7362 rtx sum = gen_rtx_UNSPEC (Pmode,
7363 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7364 gen_rtx_CONST (VOIDmode, label),
7365 GEN_INT (!TARGET_ARM)),
7366 UNSPEC_TLS);
7367 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7369 emit_insn (gen_tlscall (x, labelno));
7370 if (!reg)
7371 reg = gen_reg_rtx (SImode);
7372 else
7373 gcc_assert (REGNO (reg) != 0);
7375 emit_move_insn (reg, reg0);
7377 return reg;
7381 legitimize_tls_address (rtx x, rtx reg)
7383 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7384 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7386 switch (model)
7388 case TLS_MODEL_GLOBAL_DYNAMIC:
7389 if (TARGET_GNU2_TLS)
7391 reg = arm_tls_descseq_addr (x, reg);
7393 tp = arm_load_tp (NULL_RTX);
7395 dest = gen_rtx_PLUS (Pmode, tp, reg);
7397 else
7399 /* Original scheme */
7400 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7401 dest = gen_reg_rtx (Pmode);
7402 emit_libcall_block (insns, dest, ret, x);
7404 return dest;
7406 case TLS_MODEL_LOCAL_DYNAMIC:
7407 if (TARGET_GNU2_TLS)
7409 reg = arm_tls_descseq_addr (x, reg);
7411 tp = arm_load_tp (NULL_RTX);
7413 dest = gen_rtx_PLUS (Pmode, tp, reg);
7415 else
7417 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7419 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7420 share the LDM result with other LD model accesses. */
7421 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7422 UNSPEC_TLS);
7423 dest = gen_reg_rtx (Pmode);
7424 emit_libcall_block (insns, dest, ret, eqv);
7426 /* Load the addend. */
7427 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7428 GEN_INT (TLS_LDO32)),
7429 UNSPEC_TLS);
7430 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7431 dest = gen_rtx_PLUS (Pmode, dest, addend);
7433 return dest;
7435 case TLS_MODEL_INITIAL_EXEC:
7436 labelno = GEN_INT (pic_labelno++);
7437 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7438 label = gen_rtx_CONST (VOIDmode, label);
7439 sum = gen_rtx_UNSPEC (Pmode,
7440 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7441 GEN_INT (TARGET_ARM ? 8 : 4)),
7442 UNSPEC_TLS);
7443 reg = load_tls_operand (sum, reg);
7445 if (TARGET_ARM)
7446 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7447 else if (TARGET_THUMB2)
7448 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7449 else
7451 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7452 emit_move_insn (reg, gen_const_mem (SImode, reg));
7455 tp = arm_load_tp (NULL_RTX);
7457 return gen_rtx_PLUS (Pmode, tp, reg);
7459 case TLS_MODEL_LOCAL_EXEC:
7460 tp = arm_load_tp (NULL_RTX);
7462 reg = gen_rtx_UNSPEC (Pmode,
7463 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7464 UNSPEC_TLS);
7465 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7467 return gen_rtx_PLUS (Pmode, tp, reg);
7469 default:
7470 abort ();
7474 /* Try machine-dependent ways of modifying an illegitimate address
7475 to be legitimate. If we find one, return the new, valid address. */
7477 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7479 if (arm_tls_referenced_p (x))
7481 rtx addend = NULL;
7483 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7485 addend = XEXP (XEXP (x, 0), 1);
7486 x = XEXP (XEXP (x, 0), 0);
7489 if (GET_CODE (x) != SYMBOL_REF)
7490 return x;
7492 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7494 x = legitimize_tls_address (x, NULL_RTX);
7496 if (addend)
7498 x = gen_rtx_PLUS (SImode, x, addend);
7499 orig_x = x;
7501 else
7502 return x;
7505 if (!TARGET_ARM)
7507 /* TODO: legitimize_address for Thumb2. */
7508 if (TARGET_THUMB2)
7509 return x;
7510 return thumb_legitimize_address (x, orig_x, mode);
7513 if (GET_CODE (x) == PLUS)
7515 rtx xop0 = XEXP (x, 0);
7516 rtx xop1 = XEXP (x, 1);
7518 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7519 xop0 = force_reg (SImode, xop0);
7521 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7522 && !symbol_mentioned_p (xop1))
7523 xop1 = force_reg (SImode, xop1);
7525 if (ARM_BASE_REGISTER_RTX_P (xop0)
7526 && CONST_INT_P (xop1))
7528 HOST_WIDE_INT n, low_n;
7529 rtx base_reg, val;
7530 n = INTVAL (xop1);
7532 /* VFP addressing modes actually allow greater offsets, but for
7533 now we just stick with the lowest common denominator. */
7534 if (mode == DImode
7535 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7537 low_n = n & 0x0f;
7538 n &= ~0x0f;
7539 if (low_n > 4)
7541 n += 16;
7542 low_n -= 16;
7545 else
7547 low_n = ((mode) == TImode ? 0
7548 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7549 n -= low_n;
7552 base_reg = gen_reg_rtx (SImode);
7553 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7554 emit_move_insn (base_reg, val);
7555 x = plus_constant (Pmode, base_reg, low_n);
7557 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7558 x = gen_rtx_PLUS (SImode, xop0, xop1);
7561 /* XXX We don't allow MINUS any more -- see comment in
7562 arm_legitimate_address_outer_p (). */
7563 else if (GET_CODE (x) == MINUS)
7565 rtx xop0 = XEXP (x, 0);
7566 rtx xop1 = XEXP (x, 1);
7568 if (CONSTANT_P (xop0))
7569 xop0 = force_reg (SImode, xop0);
7571 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7572 xop1 = force_reg (SImode, xop1);
7574 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7575 x = gen_rtx_MINUS (SImode, xop0, xop1);
7578 /* Make sure to take full advantage of the pre-indexed addressing mode
7579 with absolute addresses which often allows for the base register to
7580 be factorized for multiple adjacent memory references, and it might
7581 even allows for the mini pool to be avoided entirely. */
7582 else if (CONST_INT_P (x) && optimize > 0)
7584 unsigned int bits;
7585 HOST_WIDE_INT mask, base, index;
7586 rtx base_reg;
7588 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7589 use a 8-bit index. So let's use a 12-bit index for SImode only and
7590 hope that arm_gen_constant will enable ldrb to use more bits. */
7591 bits = (mode == SImode) ? 12 : 8;
7592 mask = (1 << bits) - 1;
7593 base = INTVAL (x) & ~mask;
7594 index = INTVAL (x) & mask;
7595 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7597 /* It'll most probably be more efficient to generate the base
7598 with more bits set and use a negative index instead. */
7599 base |= mask;
7600 index -= mask;
7602 base_reg = force_reg (SImode, GEN_INT (base));
7603 x = plus_constant (Pmode, base_reg, index);
7606 if (flag_pic)
7608 /* We need to find and carefully transform any SYMBOL and LABEL
7609 references; so go back to the original address expression. */
7610 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7612 if (new_x != orig_x)
7613 x = new_x;
7616 return x;
7620 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7621 to be legitimate. If we find one, return the new, valid address. */
7623 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7625 if (GET_CODE (x) == PLUS
7626 && CONST_INT_P (XEXP (x, 1))
7627 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7628 || INTVAL (XEXP (x, 1)) < 0))
7630 rtx xop0 = XEXP (x, 0);
7631 rtx xop1 = XEXP (x, 1);
7632 HOST_WIDE_INT offset = INTVAL (xop1);
7634 /* Try and fold the offset into a biasing of the base register and
7635 then offsetting that. Don't do this when optimizing for space
7636 since it can cause too many CSEs. */
7637 if (optimize_size && offset >= 0
7638 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7640 HOST_WIDE_INT delta;
7642 if (offset >= 256)
7643 delta = offset - (256 - GET_MODE_SIZE (mode));
7644 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7645 delta = 31 * GET_MODE_SIZE (mode);
7646 else
7647 delta = offset & (~31 * GET_MODE_SIZE (mode));
7649 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7650 NULL_RTX);
7651 x = plus_constant (Pmode, xop0, delta);
7653 else if (offset < 0 && offset > -256)
7654 /* Small negative offsets are best done with a subtract before the
7655 dereference, forcing these into a register normally takes two
7656 instructions. */
7657 x = force_operand (x, NULL_RTX);
7658 else
7660 /* For the remaining cases, force the constant into a register. */
7661 xop1 = force_reg (SImode, xop1);
7662 x = gen_rtx_PLUS (SImode, xop0, xop1);
7665 else if (GET_CODE (x) == PLUS
7666 && s_register_operand (XEXP (x, 1), SImode)
7667 && !s_register_operand (XEXP (x, 0), SImode))
7669 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7671 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7674 if (flag_pic)
7676 /* We need to find and carefully transform any SYMBOL and LABEL
7677 references; so go back to the original address expression. */
7678 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7680 if (new_x != orig_x)
7681 x = new_x;
7684 return x;
7687 bool
7688 arm_legitimize_reload_address (rtx *p,
7689 enum machine_mode mode,
7690 int opnum, int type,
7691 int ind_levels ATTRIBUTE_UNUSED)
7693 /* We must recognize output that we have already generated ourselves. */
7694 if (GET_CODE (*p) == PLUS
7695 && GET_CODE (XEXP (*p, 0)) == PLUS
7696 && REG_P (XEXP (XEXP (*p, 0), 0))
7697 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7698 && CONST_INT_P (XEXP (*p, 1)))
7700 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7701 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7702 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7703 return true;
7706 if (GET_CODE (*p) == PLUS
7707 && REG_P (XEXP (*p, 0))
7708 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7709 /* If the base register is equivalent to a constant, let the generic
7710 code handle it. Otherwise we will run into problems if a future
7711 reload pass decides to rematerialize the constant. */
7712 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7713 && CONST_INT_P (XEXP (*p, 1)))
7715 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7716 HOST_WIDE_INT low, high;
7718 /* Detect coprocessor load/stores. */
7719 bool coproc_p = ((TARGET_HARD_FLOAT
7720 && TARGET_VFP
7721 && (mode == SFmode || mode == DFmode))
7722 || (TARGET_REALLY_IWMMXT
7723 && VALID_IWMMXT_REG_MODE (mode))
7724 || (TARGET_NEON
7725 && (VALID_NEON_DREG_MODE (mode)
7726 || VALID_NEON_QREG_MODE (mode))));
7728 /* For some conditions, bail out when lower two bits are unaligned. */
7729 if ((val & 0x3) != 0
7730 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7731 && (coproc_p
7732 /* For DI, and DF under soft-float: */
7733 || ((mode == DImode || mode == DFmode)
7734 /* Without ldrd, we use stm/ldm, which does not
7735 fair well with unaligned bits. */
7736 && (! TARGET_LDRD
7737 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7738 || TARGET_THUMB2))))
7739 return false;
7741 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7742 of which the (reg+high) gets turned into a reload add insn,
7743 we try to decompose the index into high/low values that can often
7744 also lead to better reload CSE.
7745 For example:
7746 ldr r0, [r2, #4100] // Offset too large
7747 ldr r1, [r2, #4104] // Offset too large
7749 is best reloaded as:
7750 add t1, r2, #4096
7751 ldr r0, [t1, #4]
7752 add t2, r2, #4096
7753 ldr r1, [t2, #8]
7755 which post-reload CSE can simplify in most cases to eliminate the
7756 second add instruction:
7757 add t1, r2, #4096
7758 ldr r0, [t1, #4]
7759 ldr r1, [t1, #8]
7761 The idea here is that we want to split out the bits of the constant
7762 as a mask, rather than as subtracting the maximum offset that the
7763 respective type of load/store used can handle.
7765 When encountering negative offsets, we can still utilize it even if
7766 the overall offset is positive; sometimes this may lead to an immediate
7767 that can be constructed with fewer instructions.
7768 For example:
7769 ldr r0, [r2, #0x3FFFFC]
7771 This is best reloaded as:
7772 add t1, r2, #0x400000
7773 ldr r0, [t1, #-4]
7775 The trick for spotting this for a load insn with N bits of offset
7776 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7777 negative offset that is going to make bit N and all the bits below
7778 it become zero in the remainder part.
7780 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7781 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7782 used in most cases of ARM load/store instructions. */
7784 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7785 (((VAL) & ((1 << (N)) - 1)) \
7786 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7787 : 0)
7789 if (coproc_p)
7791 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7793 /* NEON quad-word load/stores are made of two double-word accesses,
7794 so the valid index range is reduced by 8. Treat as 9-bit range if
7795 we go over it. */
7796 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7797 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7799 else if (GET_MODE_SIZE (mode) == 8)
7801 if (TARGET_LDRD)
7802 low = (TARGET_THUMB2
7803 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7804 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7805 else
7806 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7807 to access doublewords. The supported load/store offsets are
7808 -8, -4, and 4, which we try to produce here. */
7809 low = ((val & 0xf) ^ 0x8) - 0x8;
7811 else if (GET_MODE_SIZE (mode) < 8)
7813 /* NEON element load/stores do not have an offset. */
7814 if (TARGET_NEON_FP16 && mode == HFmode)
7815 return false;
7817 if (TARGET_THUMB2)
7819 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7820 Try the wider 12-bit range first, and re-try if the result
7821 is out of range. */
7822 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7823 if (low < -255)
7824 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7826 else
7828 if (mode == HImode || mode == HFmode)
7830 if (arm_arch4)
7831 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7832 else
7834 /* The storehi/movhi_bytes fallbacks can use only
7835 [-4094,+4094] of the full ldrb/strb index range. */
7836 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7837 if (low == 4095 || low == -4095)
7838 return false;
7841 else
7842 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7845 else
7846 return false;
7848 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7849 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7850 - (unsigned HOST_WIDE_INT) 0x80000000);
7851 /* Check for overflow or zero */
7852 if (low == 0 || high == 0 || (high + low != val))
7853 return false;
7855 /* Reload the high part into a base reg; leave the low part
7856 in the mem.
7857 Note that replacing this gen_rtx_PLUS with plus_constant is
7858 wrong in this case because we rely on the
7859 (plus (plus reg c1) c2) structure being preserved so that
7860 XEXP (*p, 0) in push_reload below uses the correct term. */
7861 *p = gen_rtx_PLUS (GET_MODE (*p),
7862 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7863 GEN_INT (high)),
7864 GEN_INT (low));
7865 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7866 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7867 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7868 return true;
7871 return false;
7875 thumb_legitimize_reload_address (rtx *x_p,
7876 enum machine_mode mode,
7877 int opnum, int type,
7878 int ind_levels ATTRIBUTE_UNUSED)
7880 rtx x = *x_p;
7882 if (GET_CODE (x) == PLUS
7883 && GET_MODE_SIZE (mode) < 4
7884 && REG_P (XEXP (x, 0))
7885 && XEXP (x, 0) == stack_pointer_rtx
7886 && CONST_INT_P (XEXP (x, 1))
7887 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7889 rtx orig_x = x;
7891 x = copy_rtx (x);
7892 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7893 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7894 return x;
7897 /* If both registers are hi-regs, then it's better to reload the
7898 entire expression rather than each register individually. That
7899 only requires one reload register rather than two. */
7900 if (GET_CODE (x) == PLUS
7901 && REG_P (XEXP (x, 0))
7902 && REG_P (XEXP (x, 1))
7903 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7904 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7906 rtx orig_x = x;
7908 x = copy_rtx (x);
7909 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7910 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7911 return x;
7914 return NULL;
7917 /* Test for various thread-local symbols. */
7919 /* Helper for arm_tls_referenced_p. */
7921 static int
7922 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7924 if (GET_CODE (*x) == SYMBOL_REF)
7925 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7927 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7928 TLS offsets, not real symbol references. */
7929 if (GET_CODE (*x) == UNSPEC
7930 && XINT (*x, 1) == UNSPEC_TLS)
7931 return -1;
7933 return 0;
7936 /* Return TRUE if X contains any TLS symbol references. */
7938 bool
7939 arm_tls_referenced_p (rtx x)
7941 if (! TARGET_HAVE_TLS)
7942 return false;
7944 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7947 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7949 On the ARM, allow any integer (invalid ones are removed later by insn
7950 patterns), nice doubles and symbol_refs which refer to the function's
7951 constant pool XXX.
7953 When generating pic allow anything. */
7955 static bool
7956 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7958 /* At present, we have no support for Neon structure constants, so forbid
7959 them here. It might be possible to handle simple cases like 0 and -1
7960 in future. */
7961 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7962 return false;
7964 return flag_pic || !label_mentioned_p (x);
7967 static bool
7968 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7970 return (CONST_INT_P (x)
7971 || CONST_DOUBLE_P (x)
7972 || CONSTANT_ADDRESS_P (x)
7973 || flag_pic);
7976 static bool
7977 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7979 return (!arm_cannot_force_const_mem (mode, x)
7980 && (TARGET_32BIT
7981 ? arm_legitimate_constant_p_1 (mode, x)
7982 : thumb_legitimate_constant_p (mode, x)));
7985 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7987 static bool
7988 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7990 rtx base, offset;
7992 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7994 split_const (x, &base, &offset);
7995 if (GET_CODE (base) == SYMBOL_REF
7996 && !offset_within_block_p (base, INTVAL (offset)))
7997 return true;
7999 return arm_tls_referenced_p (x);
8002 #define REG_OR_SUBREG_REG(X) \
8003 (REG_P (X) \
8004 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8006 #define REG_OR_SUBREG_RTX(X) \
8007 (REG_P (X) ? (X) : SUBREG_REG (X))
8009 static inline int
8010 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8012 enum machine_mode mode = GET_MODE (x);
8013 int total, words;
8015 switch (code)
8017 case ASHIFT:
8018 case ASHIFTRT:
8019 case LSHIFTRT:
8020 case ROTATERT:
8021 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8023 case PLUS:
8024 case MINUS:
8025 case COMPARE:
8026 case NEG:
8027 case NOT:
8028 return COSTS_N_INSNS (1);
8030 case MULT:
8031 if (CONST_INT_P (XEXP (x, 1)))
8033 int cycles = 0;
8034 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8036 while (i)
8038 i >>= 2;
8039 cycles++;
8041 return COSTS_N_INSNS (2) + cycles;
8043 return COSTS_N_INSNS (1) + 16;
8045 case SET:
8046 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8047 the mode. */
8048 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8049 return (COSTS_N_INSNS (words)
8050 + 4 * ((MEM_P (SET_SRC (x)))
8051 + MEM_P (SET_DEST (x))));
8053 case CONST_INT:
8054 if (outer == SET)
8056 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8057 return 0;
8058 if (thumb_shiftable_const (INTVAL (x)))
8059 return COSTS_N_INSNS (2);
8060 return COSTS_N_INSNS (3);
8062 else if ((outer == PLUS || outer == COMPARE)
8063 && INTVAL (x) < 256 && INTVAL (x) > -256)
8064 return 0;
8065 else if ((outer == IOR || outer == XOR || outer == AND)
8066 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8067 return COSTS_N_INSNS (1);
8068 else if (outer == AND)
8070 int i;
8071 /* This duplicates the tests in the andsi3 expander. */
8072 for (i = 9; i <= 31; i++)
8073 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8074 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8075 return COSTS_N_INSNS (2);
8077 else if (outer == ASHIFT || outer == ASHIFTRT
8078 || outer == LSHIFTRT)
8079 return 0;
8080 return COSTS_N_INSNS (2);
8082 case CONST:
8083 case CONST_DOUBLE:
8084 case LABEL_REF:
8085 case SYMBOL_REF:
8086 return COSTS_N_INSNS (3);
8088 case UDIV:
8089 case UMOD:
8090 case DIV:
8091 case MOD:
8092 return 100;
8094 case TRUNCATE:
8095 return 99;
8097 case AND:
8098 case XOR:
8099 case IOR:
8100 /* XXX guess. */
8101 return 8;
8103 case MEM:
8104 /* XXX another guess. */
8105 /* Memory costs quite a lot for the first word, but subsequent words
8106 load at the equivalent of a single insn each. */
8107 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8108 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8109 ? 4 : 0));
8111 case IF_THEN_ELSE:
8112 /* XXX a guess. */
8113 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8114 return 14;
8115 return 2;
8117 case SIGN_EXTEND:
8118 case ZERO_EXTEND:
8119 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8120 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8122 if (mode == SImode)
8123 return total;
8125 if (arm_arch6)
8126 return total + COSTS_N_INSNS (1);
8128 /* Assume a two-shift sequence. Increase the cost slightly so
8129 we prefer actual shifts over an extend operation. */
8130 return total + 1 + COSTS_N_INSNS (2);
8132 default:
8133 return 99;
8137 static inline bool
8138 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8140 enum machine_mode mode = GET_MODE (x);
8141 enum rtx_code subcode;
8142 rtx operand;
8143 enum rtx_code code = GET_CODE (x);
8144 *total = 0;
8146 switch (code)
8148 case MEM:
8149 /* Memory costs quite a lot for the first word, but subsequent words
8150 load at the equivalent of a single insn each. */
8151 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8152 return true;
8154 case DIV:
8155 case MOD:
8156 case UDIV:
8157 case UMOD:
8158 if (TARGET_HARD_FLOAT && mode == SFmode)
8159 *total = COSTS_N_INSNS (2);
8160 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8161 *total = COSTS_N_INSNS (4);
8162 else
8163 *total = COSTS_N_INSNS (20);
8164 return false;
8166 case ROTATE:
8167 if (REG_P (XEXP (x, 1)))
8168 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8169 else if (!CONST_INT_P (XEXP (x, 1)))
8170 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8172 /* Fall through */
8173 case ROTATERT:
8174 if (mode != SImode)
8176 *total += COSTS_N_INSNS (4);
8177 return true;
8180 /* Fall through */
8181 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8182 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8183 if (mode == DImode)
8185 *total += COSTS_N_INSNS (3);
8186 return true;
8189 *total += COSTS_N_INSNS (1);
8190 /* Increase the cost of complex shifts because they aren't any faster,
8191 and reduce dual issue opportunities. */
8192 if (arm_tune_cortex_a9
8193 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8194 ++*total;
8196 return true;
8198 case MINUS:
8199 if (mode == DImode)
8201 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8202 if (CONST_INT_P (XEXP (x, 0))
8203 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8205 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8206 return true;
8209 if (CONST_INT_P (XEXP (x, 1))
8210 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8212 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8213 return true;
8216 return false;
8219 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8221 if (TARGET_HARD_FLOAT
8222 && (mode == SFmode
8223 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8225 *total = COSTS_N_INSNS (1);
8226 if (CONST_DOUBLE_P (XEXP (x, 0))
8227 && arm_const_double_rtx (XEXP (x, 0)))
8229 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8230 return true;
8233 if (CONST_DOUBLE_P (XEXP (x, 1))
8234 && arm_const_double_rtx (XEXP (x, 1)))
8236 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8237 return true;
8240 return false;
8242 *total = COSTS_N_INSNS (20);
8243 return false;
8246 *total = COSTS_N_INSNS (1);
8247 if (CONST_INT_P (XEXP (x, 0))
8248 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8250 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8251 return true;
8254 subcode = GET_CODE (XEXP (x, 1));
8255 if (subcode == ASHIFT || subcode == ASHIFTRT
8256 || subcode == LSHIFTRT
8257 || subcode == ROTATE || subcode == ROTATERT)
8259 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8260 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8261 return true;
8264 /* A shift as a part of RSB costs no more than RSB itself. */
8265 if (GET_CODE (XEXP (x, 0)) == MULT
8266 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8268 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8269 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8270 return true;
8273 if (subcode == MULT
8274 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8276 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8277 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8278 return true;
8281 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8282 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8284 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8285 if (REG_P (XEXP (XEXP (x, 1), 0))
8286 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8287 *total += COSTS_N_INSNS (1);
8289 return true;
8292 /* Fall through */
8294 case PLUS:
8295 if (code == PLUS && arm_arch6 && mode == SImode
8296 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8297 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8299 *total = COSTS_N_INSNS (1);
8300 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8301 0, speed);
8302 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8303 return true;
8306 /* MLA: All arguments must be registers. We filter out
8307 multiplication by a power of two, so that we fall down into
8308 the code below. */
8309 if (GET_CODE (XEXP (x, 0)) == MULT
8310 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8312 /* The cost comes from the cost of the multiply. */
8313 return false;
8316 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8318 if (TARGET_HARD_FLOAT
8319 && (mode == SFmode
8320 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8322 *total = COSTS_N_INSNS (1);
8323 if (CONST_DOUBLE_P (XEXP (x, 1))
8324 && arm_const_double_rtx (XEXP (x, 1)))
8326 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8327 return true;
8330 return false;
8333 *total = COSTS_N_INSNS (20);
8334 return false;
8337 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8338 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8340 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8341 if (REG_P (XEXP (XEXP (x, 0), 0))
8342 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8343 *total += COSTS_N_INSNS (1);
8344 return true;
8347 /* Fall through */
8349 case AND: case XOR: case IOR:
8351 /* Normally the frame registers will be spilt into reg+const during
8352 reload, so it is a bad idea to combine them with other instructions,
8353 since then they might not be moved outside of loops. As a compromise
8354 we allow integration with ops that have a constant as their second
8355 operand. */
8356 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8357 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8358 && !CONST_INT_P (XEXP (x, 1)))
8359 *total = COSTS_N_INSNS (1);
8361 if (mode == DImode)
8363 *total += COSTS_N_INSNS (2);
8364 if (CONST_INT_P (XEXP (x, 1))
8365 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8367 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8368 return true;
8371 return false;
8374 *total += COSTS_N_INSNS (1);
8375 if (CONST_INT_P (XEXP (x, 1))
8376 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8378 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8379 return true;
8381 subcode = GET_CODE (XEXP (x, 0));
8382 if (subcode == ASHIFT || subcode == ASHIFTRT
8383 || subcode == LSHIFTRT
8384 || subcode == ROTATE || subcode == ROTATERT)
8386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8388 return true;
8391 if (subcode == MULT
8392 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8394 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8395 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8396 return true;
8399 if (subcode == UMIN || subcode == UMAX
8400 || subcode == SMIN || subcode == SMAX)
8402 *total = COSTS_N_INSNS (3);
8403 return true;
8406 return false;
8408 case MULT:
8409 /* This should have been handled by the CPU specific routines. */
8410 gcc_unreachable ();
8412 case TRUNCATE:
8413 if (arm_arch3m && mode == SImode
8414 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8416 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8417 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8418 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8419 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8421 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8422 return true;
8424 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8425 return false;
8427 case NEG:
8428 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8430 if (TARGET_HARD_FLOAT
8431 && (mode == SFmode
8432 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8434 *total = COSTS_N_INSNS (1);
8435 return false;
8437 *total = COSTS_N_INSNS (2);
8438 return false;
8441 /* Fall through */
8442 case NOT:
8443 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8444 if (mode == SImode && code == NOT)
8446 subcode = GET_CODE (XEXP (x, 0));
8447 if (subcode == ASHIFT || subcode == ASHIFTRT
8448 || subcode == LSHIFTRT
8449 || subcode == ROTATE || subcode == ROTATERT
8450 || (subcode == MULT
8451 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8453 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8454 /* Register shifts cost an extra cycle. */
8455 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8456 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8457 subcode, 1, speed);
8458 return true;
8462 return false;
8464 case IF_THEN_ELSE:
8465 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8467 *total = COSTS_N_INSNS (4);
8468 return true;
8471 operand = XEXP (x, 0);
8473 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8474 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8475 && REG_P (XEXP (operand, 0))
8476 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8477 *total += COSTS_N_INSNS (1);
8478 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8479 + rtx_cost (XEXP (x, 2), code, 2, speed));
8480 return true;
8482 case NE:
8483 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8485 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8486 return true;
8488 goto scc_insn;
8490 case GE:
8491 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8492 && mode == SImode && XEXP (x, 1) == const0_rtx)
8494 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8495 return true;
8497 goto scc_insn;
8499 case LT:
8500 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8501 && mode == SImode && XEXP (x, 1) == const0_rtx)
8503 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8504 return true;
8506 goto scc_insn;
8508 case EQ:
8509 case GT:
8510 case LE:
8511 case GEU:
8512 case LTU:
8513 case GTU:
8514 case LEU:
8515 case UNORDERED:
8516 case ORDERED:
8517 case UNEQ:
8518 case UNGE:
8519 case UNLT:
8520 case UNGT:
8521 case UNLE:
8522 scc_insn:
8523 /* SCC insns. In the case where the comparison has already been
8524 performed, then they cost 2 instructions. Otherwise they need
8525 an additional comparison before them. */
8526 *total = COSTS_N_INSNS (2);
8527 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8529 return true;
8532 /* Fall through */
8533 case COMPARE:
8534 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8536 *total = 0;
8537 return true;
8540 *total += COSTS_N_INSNS (1);
8541 if (CONST_INT_P (XEXP (x, 1))
8542 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8544 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8545 return true;
8548 subcode = GET_CODE (XEXP (x, 0));
8549 if (subcode == ASHIFT || subcode == ASHIFTRT
8550 || subcode == LSHIFTRT
8551 || subcode == ROTATE || subcode == ROTATERT)
8553 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8555 return true;
8558 if (subcode == MULT
8559 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8561 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8562 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8563 return true;
8566 return false;
8568 case UMIN:
8569 case UMAX:
8570 case SMIN:
8571 case SMAX:
8572 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8573 if (!CONST_INT_P (XEXP (x, 1))
8574 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8575 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8576 return true;
8578 case ABS:
8579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8581 if (TARGET_HARD_FLOAT
8582 && (mode == SFmode
8583 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8585 *total = COSTS_N_INSNS (1);
8586 return false;
8588 *total = COSTS_N_INSNS (20);
8589 return false;
8591 *total = COSTS_N_INSNS (1);
8592 if (mode == DImode)
8593 *total += COSTS_N_INSNS (3);
8594 return false;
8596 case SIGN_EXTEND:
8597 case ZERO_EXTEND:
8598 *total = 0;
8599 if (GET_MODE_CLASS (mode) == MODE_INT)
8601 rtx op = XEXP (x, 0);
8602 enum machine_mode opmode = GET_MODE (op);
8604 if (mode == DImode)
8605 *total += COSTS_N_INSNS (1);
8607 if (opmode != SImode)
8609 if (MEM_P (op))
8611 /* If !arm_arch4, we use one of the extendhisi2_mem
8612 or movhi_bytes patterns for HImode. For a QImode
8613 sign extension, we first zero-extend from memory
8614 and then perform a shift sequence. */
8615 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8616 *total += COSTS_N_INSNS (2);
8618 else if (arm_arch6)
8619 *total += COSTS_N_INSNS (1);
8621 /* We don't have the necessary insn, so we need to perform some
8622 other operation. */
8623 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8624 /* An and with constant 255. */
8625 *total += COSTS_N_INSNS (1);
8626 else
8627 /* A shift sequence. Increase costs slightly to avoid
8628 combining two shifts into an extend operation. */
8629 *total += COSTS_N_INSNS (2) + 1;
8632 return false;
8635 switch (GET_MODE (XEXP (x, 0)))
8637 case V8QImode:
8638 case V4HImode:
8639 case V2SImode:
8640 case V4QImode:
8641 case V2HImode:
8642 *total = COSTS_N_INSNS (1);
8643 return false;
8645 default:
8646 gcc_unreachable ();
8648 gcc_unreachable ();
8650 case ZERO_EXTRACT:
8651 case SIGN_EXTRACT:
8652 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8653 return true;
8655 case CONST_INT:
8656 if (const_ok_for_arm (INTVAL (x))
8657 || const_ok_for_arm (~INTVAL (x)))
8658 *total = COSTS_N_INSNS (1);
8659 else
8660 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8661 INTVAL (x), NULL_RTX,
8662 NULL_RTX, 0, 0));
8663 return true;
8665 case CONST:
8666 case LABEL_REF:
8667 case SYMBOL_REF:
8668 *total = COSTS_N_INSNS (3);
8669 return true;
8671 case HIGH:
8672 *total = COSTS_N_INSNS (1);
8673 return true;
8675 case LO_SUM:
8676 *total = COSTS_N_INSNS (1);
8677 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8678 return true;
8680 case CONST_DOUBLE:
8681 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8682 && (mode == SFmode || !TARGET_VFP_SINGLE))
8683 *total = COSTS_N_INSNS (1);
8684 else
8685 *total = COSTS_N_INSNS (4);
8686 return true;
8688 case SET:
8689 /* The vec_extract patterns accept memory operands that require an
8690 address reload. Account for the cost of that reload to give the
8691 auto-inc-dec pass an incentive to try to replace them. */
8692 if (TARGET_NEON && MEM_P (SET_DEST (x))
8693 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8695 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8696 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8697 *total += COSTS_N_INSNS (1);
8698 return true;
8700 /* Likewise for the vec_set patterns. */
8701 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8702 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8703 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8705 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8706 *total = rtx_cost (mem, code, 0, speed);
8707 if (!neon_vector_mem_operand (mem, 2, true))
8708 *total += COSTS_N_INSNS (1);
8709 return true;
8711 return false;
8713 case UNSPEC:
8714 /* We cost this as high as our memory costs to allow this to
8715 be hoisted from loops. */
8716 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8718 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8720 return true;
8722 case CONST_VECTOR:
8723 if (TARGET_NEON
8724 && TARGET_HARD_FLOAT
8725 && outer == SET
8726 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8727 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8728 *total = COSTS_N_INSNS (1);
8729 else
8730 *total = COSTS_N_INSNS (4);
8731 return true;
8733 default:
8734 *total = COSTS_N_INSNS (4);
8735 return false;
8739 /* Estimates the size cost of thumb1 instructions.
8740 For now most of the code is copied from thumb1_rtx_costs. We need more
8741 fine grain tuning when we have more related test cases. */
8742 static inline int
8743 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8745 enum machine_mode mode = GET_MODE (x);
8746 int words;
8748 switch (code)
8750 case ASHIFT:
8751 case ASHIFTRT:
8752 case LSHIFTRT:
8753 case ROTATERT:
8754 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8756 case PLUS:
8757 case MINUS:
8758 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8759 defined by RTL expansion, especially for the expansion of
8760 multiplication. */
8761 if ((GET_CODE (XEXP (x, 0)) == MULT
8762 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8763 || (GET_CODE (XEXP (x, 1)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8765 return COSTS_N_INSNS (2);
8766 /* On purpose fall through for normal RTX. */
8767 case COMPARE:
8768 case NEG:
8769 case NOT:
8770 return COSTS_N_INSNS (1);
8772 case MULT:
8773 if (CONST_INT_P (XEXP (x, 1)))
8775 /* Thumb1 mul instruction can't operate on const. We must Load it
8776 into a register first. */
8777 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8778 return COSTS_N_INSNS (1) + const_size;
8780 return COSTS_N_INSNS (1);
8782 case SET:
8783 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8784 the mode. */
8785 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8786 return (COSTS_N_INSNS (words)
8787 + 4 * ((MEM_P (SET_SRC (x)))
8788 + MEM_P (SET_DEST (x))));
8790 case CONST_INT:
8791 if (outer == SET)
8793 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8794 return COSTS_N_INSNS (1);
8795 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8796 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8797 return COSTS_N_INSNS (2);
8798 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8799 if (thumb_shiftable_const (INTVAL (x)))
8800 return COSTS_N_INSNS (2);
8801 return COSTS_N_INSNS (3);
8803 else if ((outer == PLUS || outer == COMPARE)
8804 && INTVAL (x) < 256 && INTVAL (x) > -256)
8805 return 0;
8806 else if ((outer == IOR || outer == XOR || outer == AND)
8807 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8808 return COSTS_N_INSNS (1);
8809 else if (outer == AND)
8811 int i;
8812 /* This duplicates the tests in the andsi3 expander. */
8813 for (i = 9; i <= 31; i++)
8814 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8815 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8816 return COSTS_N_INSNS (2);
8818 else if (outer == ASHIFT || outer == ASHIFTRT
8819 || outer == LSHIFTRT)
8820 return 0;
8821 return COSTS_N_INSNS (2);
8823 case CONST:
8824 case CONST_DOUBLE:
8825 case LABEL_REF:
8826 case SYMBOL_REF:
8827 return COSTS_N_INSNS (3);
8829 case UDIV:
8830 case UMOD:
8831 case DIV:
8832 case MOD:
8833 return 100;
8835 case TRUNCATE:
8836 return 99;
8838 case AND:
8839 case XOR:
8840 case IOR:
8841 /* XXX guess. */
8842 return 8;
8844 case MEM:
8845 /* XXX another guess. */
8846 /* Memory costs quite a lot for the first word, but subsequent words
8847 load at the equivalent of a single insn each. */
8848 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8849 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8850 ? 4 : 0));
8852 case IF_THEN_ELSE:
8853 /* XXX a guess. */
8854 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8855 return 14;
8856 return 2;
8858 case ZERO_EXTEND:
8859 /* XXX still guessing. */
8860 switch (GET_MODE (XEXP (x, 0)))
8862 case QImode:
8863 return (1 + (mode == DImode ? 4 : 0)
8864 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8866 case HImode:
8867 return (4 + (mode == DImode ? 4 : 0)
8868 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8870 case SImode:
8871 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8873 default:
8874 return 99;
8877 default:
8878 return 99;
8882 /* RTX costs when optimizing for size. */
8883 static bool
8884 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8885 int *total)
8887 enum machine_mode mode = GET_MODE (x);
8888 if (TARGET_THUMB1)
8890 *total = thumb1_size_rtx_costs (x, code, outer_code);
8891 return true;
8894 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8895 switch (code)
8897 case MEM:
8898 /* A memory access costs 1 insn if the mode is small, or the address is
8899 a single register, otherwise it costs one insn per word. */
8900 if (REG_P (XEXP (x, 0)))
8901 *total = COSTS_N_INSNS (1);
8902 else if (flag_pic
8903 && GET_CODE (XEXP (x, 0)) == PLUS
8904 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8905 /* This will be split into two instructions.
8906 See arm.md:calculate_pic_address. */
8907 *total = COSTS_N_INSNS (2);
8908 else
8909 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8910 return true;
8912 case DIV:
8913 case MOD:
8914 case UDIV:
8915 case UMOD:
8916 /* Needs a libcall, so it costs about this. */
8917 *total = COSTS_N_INSNS (2);
8918 return false;
8920 case ROTATE:
8921 if (mode == SImode && REG_P (XEXP (x, 1)))
8923 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8924 return true;
8926 /* Fall through */
8927 case ROTATERT:
8928 case ASHIFT:
8929 case LSHIFTRT:
8930 case ASHIFTRT:
8931 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8933 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8934 return true;
8936 else if (mode == SImode)
8938 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8939 /* Slightly disparage register shifts, but not by much. */
8940 if (!CONST_INT_P (XEXP (x, 1)))
8941 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8942 return true;
8945 /* Needs a libcall. */
8946 *total = COSTS_N_INSNS (2);
8947 return false;
8949 case MINUS:
8950 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8951 && (mode == SFmode || !TARGET_VFP_SINGLE))
8953 *total = COSTS_N_INSNS (1);
8954 return false;
8957 if (mode == SImode)
8959 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8960 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8962 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8963 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8964 || subcode1 == ROTATE || subcode1 == ROTATERT
8965 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8966 || subcode1 == ASHIFTRT)
8968 /* It's just the cost of the two operands. */
8969 *total = 0;
8970 return false;
8973 *total = COSTS_N_INSNS (1);
8974 return false;
8977 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8978 return false;
8980 case PLUS:
8981 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8982 && (mode == SFmode || !TARGET_VFP_SINGLE))
8984 *total = COSTS_N_INSNS (1);
8985 return false;
8988 /* A shift as a part of ADD costs nothing. */
8989 if (GET_CODE (XEXP (x, 0)) == MULT
8990 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8992 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8994 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8995 return true;
8998 /* Fall through */
8999 case AND: case XOR: case IOR:
9000 if (mode == SImode)
9002 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9004 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9005 || subcode == LSHIFTRT || subcode == ASHIFTRT
9006 || (code == AND && subcode == NOT))
9008 /* It's just the cost of the two operands. */
9009 *total = 0;
9010 return false;
9014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9015 return false;
9017 case MULT:
9018 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9019 return false;
9021 case NEG:
9022 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9023 && (mode == SFmode || !TARGET_VFP_SINGLE))
9025 *total = COSTS_N_INSNS (1);
9026 return false;
9029 /* Fall through */
9030 case NOT:
9031 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9033 return false;
9035 case IF_THEN_ELSE:
9036 *total = 0;
9037 return false;
9039 case COMPARE:
9040 if (cc_register (XEXP (x, 0), VOIDmode))
9041 * total = 0;
9042 else
9043 *total = COSTS_N_INSNS (1);
9044 return false;
9046 case ABS:
9047 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9048 && (mode == SFmode || !TARGET_VFP_SINGLE))
9049 *total = COSTS_N_INSNS (1);
9050 else
9051 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9052 return false;
9054 case SIGN_EXTEND:
9055 case ZERO_EXTEND:
9056 return arm_rtx_costs_1 (x, outer_code, total, 0);
9058 case CONST_INT:
9059 if (const_ok_for_arm (INTVAL (x)))
9060 /* A multiplication by a constant requires another instruction
9061 to load the constant to a register. */
9062 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9063 ? 1 : 0);
9064 else if (const_ok_for_arm (~INTVAL (x)))
9065 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9066 else if (const_ok_for_arm (-INTVAL (x)))
9068 if (outer_code == COMPARE || outer_code == PLUS
9069 || outer_code == MINUS)
9070 *total = 0;
9071 else
9072 *total = COSTS_N_INSNS (1);
9074 else
9075 *total = COSTS_N_INSNS (2);
9076 return true;
9078 case CONST:
9079 case LABEL_REF:
9080 case SYMBOL_REF:
9081 *total = COSTS_N_INSNS (2);
9082 return true;
9084 case CONST_DOUBLE:
9085 *total = COSTS_N_INSNS (4);
9086 return true;
9088 case CONST_VECTOR:
9089 if (TARGET_NEON
9090 && TARGET_HARD_FLOAT
9091 && outer_code == SET
9092 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9093 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9094 *total = COSTS_N_INSNS (1);
9095 else
9096 *total = COSTS_N_INSNS (4);
9097 return true;
9099 case HIGH:
9100 case LO_SUM:
9101 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9102 cost of these slightly. */
9103 *total = COSTS_N_INSNS (1) + 1;
9104 return true;
9106 case SET:
9107 return false;
9109 default:
9110 if (mode != VOIDmode)
9111 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9112 else
9113 *total = COSTS_N_INSNS (4); /* How knows? */
9114 return false;
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9125 enum rtx_code code = GET_CODE (op);
9127 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129 return XEXP (op, 0);
9130 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131 return XEXP (op, 0);
9132 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133 || code == ASHIFTRT)
9135 if (!CONST_INT_P (XEXP (op, 1)))
9136 *shift_reg = XEXP (op, 1);
9137 return XEXP (op, 0);
9140 return NULL;
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9146 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147 gcc_assert (GET_CODE (x) == UNSPEC);
9149 switch (XINT (x, 1))
9151 case UNSPEC_UNALIGNED_LOAD:
9152 /* We can only do unaligned loads into the integer unit, and we can't
9153 use LDM or LDRD. */
9154 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9155 if (speed_p)
9156 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9157 + extra_cost->ldst.load_unaligned);
9159 #ifdef NOT_YET
9160 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9161 ADDR_SPACE_GENERIC, speed_p);
9162 #endif
9163 return true;
9165 case UNSPEC_UNALIGNED_STORE:
9166 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9167 if (speed_p)
9168 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9169 + extra_cost->ldst.store_unaligned);
9171 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9172 #ifdef NOT_YET
9173 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9174 ADDR_SPACE_GENERIC, speed_p);
9175 #endif
9176 return true;
9178 case UNSPEC_VRINTZ:
9179 case UNSPEC_VRINTP:
9180 case UNSPEC_VRINTM:
9181 case UNSPEC_VRINTR:
9182 case UNSPEC_VRINTX:
9183 case UNSPEC_VRINTA:
9184 *cost = COSTS_N_INSNS (1);
9185 if (speed_p)
9186 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9188 return true;
9189 default:
9190 *cost = COSTS_N_INSNS (2);
9191 break;
9193 return false;
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9201 do \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9207 if (shift_reg) \
9209 if (speed_p) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9213 else if (speed_p) \
9214 *cost += extra_cost->alu.arith_shift; \
9216 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9217 + rtx_cost (XEXP (x, 1 - IDX), \
9218 OP, 1, speed_p)); \
9219 return true; \
9222 while (0);
9224 /* RTX costs. Make an estimate of the cost of executing the operation
9225 X, which is contained with an operation with code OUTER_CODE.
9226 SPEED_P indicates whether the cost desired is the performance cost,
9227 or the size cost. The estimate is stored in COST and the return
9228 value is TRUE if the cost calculation is final, or FALSE if the
9229 caller should recurse through the operands of X to add additional
9230 costs.
9232 We currently make no attempt to model the size savings of Thumb-2
9233 16-bit instructions. At the normal points in compilation where
9234 this code is called we have no measure of whether the condition
9235 flags are live or not, and thus no realistic way to determine what
9236 the size will eventually be. */
9237 static bool
9238 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9239 const struct cpu_cost_table *extra_cost,
9240 int *cost, bool speed_p)
9242 enum machine_mode mode = GET_MODE (x);
9244 if (TARGET_THUMB1)
9246 if (speed_p)
9247 *cost = thumb1_rtx_costs (x, code, outer_code);
9248 else
9249 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9250 return true;
9253 switch (code)
9255 case SET:
9256 *cost = 0;
9257 /* SET RTXs don't have a mode so we get it from the destination. */
9258 mode = GET_MODE (SET_DEST (x));
9260 if (REG_P (SET_SRC (x))
9261 && REG_P (SET_DEST (x)))
9263 /* Assume that most copies can be done with a single insn,
9264 unless we don't have HW FP, in which case everything
9265 larger than word mode will require two insns. */
9266 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9267 && GET_MODE_SIZE (mode) > 4)
9268 || mode == DImode)
9269 ? 2 : 1);
9270 /* Conditional register moves can be encoded
9271 in 16 bits in Thumb mode. */
9272 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9273 *cost >>= 1;
9275 return true;
9278 if (CONST_INT_P (SET_SRC (x)))
9280 /* Handle CONST_INT here, since the value doesn't have a mode
9281 and we would otherwise be unable to work out the true cost. */
9282 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9283 outer_code = SET;
9284 /* Slightly lower the cost of setting a core reg to a constant.
9285 This helps break up chains and allows for better scheduling. */
9286 if (REG_P (SET_DEST (x))
9287 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9288 *cost -= 1;
9289 x = SET_SRC (x);
9290 /* Immediate moves with an immediate in the range [0, 255] can be
9291 encoded in 16 bits in Thumb mode. */
9292 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9293 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9294 *cost >>= 1;
9295 goto const_int_cost;
9298 return false;
9300 case MEM:
9301 /* A memory access costs 1 insn if the mode is small, or the address is
9302 a single register, otherwise it costs one insn per word. */
9303 if (REG_P (XEXP (x, 0)))
9304 *cost = COSTS_N_INSNS (1);
9305 else if (flag_pic
9306 && GET_CODE (XEXP (x, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308 /* This will be split into two instructions.
9309 See arm.md:calculate_pic_address. */
9310 *cost = COSTS_N_INSNS (2);
9311 else
9312 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9314 /* For speed optimizations, add the costs of the address and
9315 accessing memory. */
9316 if (speed_p)
9317 #ifdef NOT_YET
9318 *cost += (extra_cost->ldst.load
9319 + arm_address_cost (XEXP (x, 0), mode,
9320 ADDR_SPACE_GENERIC, speed_p));
9321 #else
9322 *cost += extra_cost->ldst.load;
9323 #endif
9324 return true;
9326 case PARALLEL:
9328 /* Calculations of LDM costs are complex. We assume an initial cost
9329 (ldm_1st) which will load the number of registers mentioned in
9330 ldm_regs_per_insn_1st registers; then each additional
9331 ldm_regs_per_insn_subsequent registers cost one more insn. The
9332 formula for N regs is thus:
9334 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335 + ldm_regs_per_insn_subsequent - 1)
9336 / ldm_regs_per_insn_subsequent).
9338 Additional costs may also be added for addressing. A similar
9339 formula is used for STM. */
9341 bool is_ldm = load_multiple_operation (x, SImode);
9342 bool is_stm = store_multiple_operation (x, SImode);
9344 *cost = COSTS_N_INSNS (1);
9346 if (is_ldm || is_stm)
9348 if (speed_p)
9350 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9351 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9352 ? extra_cost->ldst.ldm_regs_per_insn_1st
9353 : extra_cost->ldst.stm_regs_per_insn_1st;
9354 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9355 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9356 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9358 *cost += regs_per_insn_1st
9359 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9360 + regs_per_insn_sub - 1)
9361 / regs_per_insn_sub);
9362 return true;
9366 return false;
9368 case DIV:
9369 case UDIV:
9370 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9371 && (mode == SFmode || !TARGET_VFP_SINGLE))
9372 *cost = COSTS_N_INSNS (speed_p
9373 ? extra_cost->fp[mode != SFmode].div : 1);
9374 else if (mode == SImode && TARGET_IDIV)
9375 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9376 else
9377 *cost = LIBCALL_COST (2);
9378 return false; /* All arguments must be in registers. */
9380 case MOD:
9381 case UMOD:
9382 *cost = LIBCALL_COST (2);
9383 return false; /* All arguments must be in registers. */
9385 case ROTATE:
9386 if (mode == SImode && REG_P (XEXP (x, 1)))
9388 *cost = (COSTS_N_INSNS (2)
9389 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9390 if (speed_p)
9391 *cost += extra_cost->alu.shift_reg;
9392 return true;
9394 /* Fall through */
9395 case ROTATERT:
9396 case ASHIFT:
9397 case LSHIFTRT:
9398 case ASHIFTRT:
9399 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9401 *cost = (COSTS_N_INSNS (3)
9402 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9403 if (speed_p)
9404 *cost += 2 * extra_cost->alu.shift;
9405 return true;
9407 else if (mode == SImode)
9409 *cost = (COSTS_N_INSNS (1)
9410 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9411 /* Slightly disparage register shifts at -Os, but not by much. */
9412 if (!CONST_INT_P (XEXP (x, 1)))
9413 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9414 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9415 return true;
9417 else if (GET_MODE_CLASS (mode) == MODE_INT
9418 && GET_MODE_SIZE (mode) < 4)
9420 if (code == ASHIFT)
9422 *cost = (COSTS_N_INSNS (1)
9423 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9424 /* Slightly disparage register shifts at -Os, but not by
9425 much. */
9426 if (!CONST_INT_P (XEXP (x, 1)))
9427 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9428 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9430 else if (code == LSHIFTRT || code == ASHIFTRT)
9432 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9434 /* Can use SBFX/UBFX. */
9435 *cost = COSTS_N_INSNS (1);
9436 if (speed_p)
9437 *cost += extra_cost->alu.bfx;
9438 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9440 else
9442 *cost = COSTS_N_INSNS (2);
9443 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9444 if (speed_p)
9446 if (CONST_INT_P (XEXP (x, 1)))
9447 *cost += 2 * extra_cost->alu.shift;
9448 else
9449 *cost += (extra_cost->alu.shift
9450 + extra_cost->alu.shift_reg);
9452 else
9453 /* Slightly disparage register shifts. */
9454 *cost += !CONST_INT_P (XEXP (x, 1));
9457 else /* Rotates. */
9459 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9460 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9461 if (speed_p)
9463 if (CONST_INT_P (XEXP (x, 1)))
9464 *cost += (2 * extra_cost->alu.shift
9465 + extra_cost->alu.log_shift);
9466 else
9467 *cost += (extra_cost->alu.shift
9468 + extra_cost->alu.shift_reg
9469 + extra_cost->alu.log_shift_reg);
9472 return true;
9475 *cost = LIBCALL_COST (2);
9476 return false;
9478 case BSWAP:
9479 if (arm_arch6)
9481 if (mode == SImode)
9483 *cost = COSTS_N_INSNS (1);
9484 if (speed_p)
9485 *cost += extra_cost->alu.rev;
9487 return false;
9490 else
9492 /* No rev instruction available. Look at arm_legacy_rev
9493 and thumb_legacy_rev for the form of RTL used then. */
9494 if (TARGET_THUMB)
9496 *cost = COSTS_N_INSNS (10);
9498 if (speed_p)
9500 *cost += 6 * extra_cost->alu.shift;
9501 *cost += 3 * extra_cost->alu.logical;
9504 else
9506 *cost = COSTS_N_INSNS (5);
9508 if (speed_p)
9510 *cost += 2 * extra_cost->alu.shift;
9511 *cost += extra_cost->alu.arith_shift;
9512 *cost += 2 * extra_cost->alu.logical;
9515 return true;
9517 return false;
9519 case MINUS:
9520 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9521 && (mode == SFmode || !TARGET_VFP_SINGLE))
9523 *cost = COSTS_N_INSNS (1);
9524 if (GET_CODE (XEXP (x, 0)) == MULT
9525 || GET_CODE (XEXP (x, 1)) == MULT)
9527 rtx mul_op0, mul_op1, sub_op;
9529 if (speed_p)
9530 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9532 if (GET_CODE (XEXP (x, 0)) == MULT)
9534 mul_op0 = XEXP (XEXP (x, 0), 0);
9535 mul_op1 = XEXP (XEXP (x, 0), 1);
9536 sub_op = XEXP (x, 1);
9538 else
9540 mul_op0 = XEXP (XEXP (x, 1), 0);
9541 mul_op1 = XEXP (XEXP (x, 1), 1);
9542 sub_op = XEXP (x, 0);
9545 /* The first operand of the multiply may be optionally
9546 negated. */
9547 if (GET_CODE (mul_op0) == NEG)
9548 mul_op0 = XEXP (mul_op0, 0);
9550 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9551 + rtx_cost (mul_op1, code, 0, speed_p)
9552 + rtx_cost (sub_op, code, 0, speed_p));
9554 return true;
9557 if (speed_p)
9558 *cost += extra_cost->fp[mode != SFmode].addsub;
9559 return false;
9562 if (mode == SImode)
9564 rtx shift_by_reg = NULL;
9565 rtx shift_op;
9566 rtx non_shift_op;
9568 *cost = COSTS_N_INSNS (1);
9570 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9571 if (shift_op == NULL)
9573 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9574 non_shift_op = XEXP (x, 0);
9576 else
9577 non_shift_op = XEXP (x, 1);
9579 if (shift_op != NULL)
9581 if (shift_by_reg != NULL)
9583 if (speed_p)
9584 *cost += extra_cost->alu.arith_shift_reg;
9585 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9587 else if (speed_p)
9588 *cost += extra_cost->alu.arith_shift;
9590 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9591 + rtx_cost (non_shift_op, code, 0, speed_p));
9592 return true;
9595 if (arm_arch_thumb2
9596 && GET_CODE (XEXP (x, 1)) == MULT)
9598 /* MLS. */
9599 if (speed_p)
9600 *cost += extra_cost->mult[0].add;
9601 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9602 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9603 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9604 return true;
9607 if (CONST_INT_P (XEXP (x, 0)))
9609 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9610 INTVAL (XEXP (x, 0)), NULL_RTX,
9611 NULL_RTX, 1, 0);
9612 *cost = COSTS_N_INSNS (insns);
9613 if (speed_p)
9614 *cost += insns * extra_cost->alu.arith;
9615 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9616 return true;
9619 return false;
9622 if (GET_MODE_CLASS (mode) == MODE_INT
9623 && GET_MODE_SIZE (mode) < 4)
9625 rtx shift_op, shift_reg;
9626 shift_reg = NULL;
9628 /* We check both sides of the MINUS for shifter operands since,
9629 unlike PLUS, it's not commutative. */
9631 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9632 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9634 /* Slightly disparage, as we might need to widen the result. */
9635 *cost = 1 + COSTS_N_INSNS (1);
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith;
9639 if (CONST_INT_P (XEXP (x, 0)))
9641 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9642 return true;
9645 return false;
9648 if (mode == DImode)
9650 *cost = COSTS_N_INSNS (2);
9652 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9654 rtx op1 = XEXP (x, 1);
9656 if (speed_p)
9657 *cost += 2 * extra_cost->alu.arith;
9659 if (GET_CODE (op1) == ZERO_EXTEND)
9660 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9661 else
9662 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9663 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9664 0, speed_p);
9665 return true;
9667 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9669 if (speed_p)
9670 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9671 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9672 0, speed_p)
9673 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9674 return true;
9676 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9677 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9679 if (speed_p)
9680 *cost += (extra_cost->alu.arith
9681 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9682 ? extra_cost->alu.arith
9683 : extra_cost->alu.arith_shift));
9684 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9685 + rtx_cost (XEXP (XEXP (x, 1), 0),
9686 GET_CODE (XEXP (x, 1)), 0, speed_p));
9687 return true;
9690 if (speed_p)
9691 *cost += 2 * extra_cost->alu.arith;
9692 return false;
9695 /* Vector mode? */
9697 *cost = LIBCALL_COST (2);
9698 return false;
9700 case PLUS:
9701 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9702 && (mode == SFmode || !TARGET_VFP_SINGLE))
9704 *cost = COSTS_N_INSNS (1);
9705 if (GET_CODE (XEXP (x, 0)) == MULT)
9707 rtx mul_op0, mul_op1, add_op;
9709 if (speed_p)
9710 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9712 mul_op0 = XEXP (XEXP (x, 0), 0);
9713 mul_op1 = XEXP (XEXP (x, 0), 1);
9714 add_op = XEXP (x, 1);
9716 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9717 + rtx_cost (mul_op1, code, 0, speed_p)
9718 + rtx_cost (add_op, code, 0, speed_p));
9720 return true;
9723 if (speed_p)
9724 *cost += extra_cost->fp[mode != SFmode].addsub;
9725 return false;
9727 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9729 *cost = LIBCALL_COST (2);
9730 return false;
9733 /* Narrow modes can be synthesized in SImode, but the range
9734 of useful sub-operations is limited. Check for shift operations
9735 on one of the operands. Only left shifts can be used in the
9736 narrow modes. */
9737 if (GET_MODE_CLASS (mode) == MODE_INT
9738 && GET_MODE_SIZE (mode) < 4)
9740 rtx shift_op, shift_reg;
9741 shift_reg = NULL;
9743 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9745 if (CONST_INT_P (XEXP (x, 1)))
9747 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9748 INTVAL (XEXP (x, 1)), NULL_RTX,
9749 NULL_RTX, 1, 0);
9750 *cost = COSTS_N_INSNS (insns);
9751 if (speed_p)
9752 *cost += insns * extra_cost->alu.arith;
9753 /* Slightly penalize a narrow operation as the result may
9754 need widening. */
9755 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9756 return true;
9759 /* Slightly penalize a narrow operation as the result may
9760 need widening. */
9761 *cost = 1 + COSTS_N_INSNS (1);
9762 if (speed_p)
9763 *cost += extra_cost->alu.arith;
9765 return false;
9768 if (mode == SImode)
9770 rtx shift_op, shift_reg;
9772 *cost = COSTS_N_INSNS (1);
9773 if (TARGET_INT_SIMD
9774 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9775 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9777 /* UXTA[BH] or SXTA[BH]. */
9778 if (speed_p)
9779 *cost += extra_cost->alu.extend_arith;
9780 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9781 speed_p)
9782 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9783 return true;
9786 shift_reg = NULL;
9787 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9788 if (shift_op != NULL)
9790 if (shift_reg)
9792 if (speed_p)
9793 *cost += extra_cost->alu.arith_shift_reg;
9794 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9796 else if (speed_p)
9797 *cost += extra_cost->alu.arith_shift;
9799 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9800 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9801 return true;
9803 if (GET_CODE (XEXP (x, 0)) == MULT)
9805 rtx mul_op = XEXP (x, 0);
9807 *cost = COSTS_N_INSNS (1);
9809 if (TARGET_DSP_MULTIPLY
9810 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822 == 16))))))
9824 /* SMLA[BT][BT]. */
9825 if (speed_p)
9826 *cost += extra_cost->mult[0].extend_add;
9827 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9828 SIGN_EXTEND, 0, speed_p)
9829 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9830 SIGN_EXTEND, 0, speed_p)
9831 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9832 return true;
9835 if (speed_p)
9836 *cost += extra_cost->mult[0].add;
9837 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9838 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9839 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9840 return true;
9842 if (CONST_INT_P (XEXP (x, 1)))
9844 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845 INTVAL (XEXP (x, 1)), NULL_RTX,
9846 NULL_RTX, 1, 0);
9847 *cost = COSTS_N_INSNS (insns);
9848 if (speed_p)
9849 *cost += insns * extra_cost->alu.arith;
9850 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9851 return true;
9853 return false;
9856 if (mode == DImode)
9858 if (arm_arch3m
9859 && GET_CODE (XEXP (x, 0)) == MULT
9860 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9862 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9865 *cost = COSTS_N_INSNS (1);
9866 if (speed_p)
9867 *cost += extra_cost->mult[1].extend_add;
9868 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9869 ZERO_EXTEND, 0, speed_p)
9870 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9871 ZERO_EXTEND, 0, speed_p)
9872 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9873 return true;
9876 *cost = COSTS_N_INSNS (2);
9878 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9879 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9881 if (speed_p)
9882 *cost += (extra_cost->alu.arith
9883 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9884 ? extra_cost->alu.arith
9885 : extra_cost->alu.arith_shift));
9887 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9888 speed_p)
9889 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9890 return true;
9893 if (speed_p)
9894 *cost += 2 * extra_cost->alu.arith;
9895 return false;
9898 /* Vector mode? */
9899 *cost = LIBCALL_COST (2);
9900 return false;
9901 case IOR:
9902 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9904 *cost = COSTS_N_INSNS (1);
9905 if (speed_p)
9906 *cost += extra_cost->alu.rev;
9908 return true;
9910 /* Fall through. */
9911 case AND: case XOR:
9912 if (mode == SImode)
9914 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9915 rtx op0 = XEXP (x, 0);
9916 rtx shift_op, shift_reg;
9918 *cost = COSTS_N_INSNS (1);
9920 if (subcode == NOT
9921 && (code == AND
9922 || (code == IOR && TARGET_THUMB2)))
9923 op0 = XEXP (op0, 0);
9925 shift_reg = NULL;
9926 shift_op = shifter_op_p (op0, &shift_reg);
9927 if (shift_op != NULL)
9929 if (shift_reg)
9931 if (speed_p)
9932 *cost += extra_cost->alu.log_shift_reg;
9933 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9935 else if (speed_p)
9936 *cost += extra_cost->alu.log_shift;
9938 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9939 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9940 return true;
9943 if (CONST_INT_P (XEXP (x, 1)))
9945 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9946 INTVAL (XEXP (x, 1)), NULL_RTX,
9947 NULL_RTX, 1, 0);
9949 *cost = COSTS_N_INSNS (insns);
9950 if (speed_p)
9951 *cost += insns * extra_cost->alu.logical;
9952 *cost += rtx_cost (op0, code, 0, speed_p);
9953 return true;
9956 if (speed_p)
9957 *cost += extra_cost->alu.logical;
9958 *cost += (rtx_cost (op0, code, 0, speed_p)
9959 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9960 return true;
9963 if (mode == DImode)
9965 rtx op0 = XEXP (x, 0);
9966 enum rtx_code subcode = GET_CODE (op0);
9968 *cost = COSTS_N_INSNS (2);
9970 if (subcode == NOT
9971 && (code == AND
9972 || (code == IOR && TARGET_THUMB2)))
9973 op0 = XEXP (op0, 0);
9975 if (GET_CODE (op0) == ZERO_EXTEND)
9977 if (speed_p)
9978 *cost += 2 * extra_cost->alu.logical;
9980 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9981 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9982 return true;
9984 else if (GET_CODE (op0) == SIGN_EXTEND)
9986 if (speed_p)
9987 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9989 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9990 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9991 return true;
9994 if (speed_p)
9995 *cost += 2 * extra_cost->alu.logical;
9997 return true;
9999 /* Vector mode? */
10001 *cost = LIBCALL_COST (2);
10002 return false;
10004 case MULT:
10005 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10006 && (mode == SFmode || !TARGET_VFP_SINGLE))
10008 rtx op0 = XEXP (x, 0);
10010 *cost = COSTS_N_INSNS (1);
10012 if (GET_CODE (op0) == NEG)
10013 op0 = XEXP (op0, 0);
10015 if (speed_p)
10016 *cost += extra_cost->fp[mode != SFmode].mult;
10018 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10019 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10020 return true;
10022 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10024 *cost = LIBCALL_COST (2);
10025 return false;
10028 if (mode == SImode)
10030 *cost = COSTS_N_INSNS (1);
10031 if (TARGET_DSP_MULTIPLY
10032 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10033 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10034 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10035 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10036 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10037 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10038 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10039 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10040 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10041 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10042 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10043 && (INTVAL (XEXP (XEXP (x, 1), 1))
10044 == 16))))))
10046 /* SMUL[TB][TB]. */
10047 if (speed_p)
10048 *cost += extra_cost->mult[0].extend;
10049 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10050 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10051 return true;
10053 if (speed_p)
10054 *cost += extra_cost->mult[0].simple;
10055 return false;
10058 if (mode == DImode)
10060 if (arm_arch3m
10061 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10066 *cost = COSTS_N_INSNS (1);
10067 if (speed_p)
10068 *cost += extra_cost->mult[1].extend;
10069 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10070 ZERO_EXTEND, 0, speed_p)
10071 + rtx_cost (XEXP (XEXP (x, 1), 0),
10072 ZERO_EXTEND, 0, speed_p));
10073 return true;
10076 *cost = LIBCALL_COST (2);
10077 return false;
10080 /* Vector mode? */
10081 *cost = LIBCALL_COST (2);
10082 return false;
10084 case NEG:
10085 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10086 && (mode == SFmode || !TARGET_VFP_SINGLE))
10088 *cost = COSTS_N_INSNS (1);
10089 if (speed_p)
10090 *cost += extra_cost->fp[mode != SFmode].neg;
10092 return false;
10094 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10096 *cost = LIBCALL_COST (1);
10097 return false;
10100 if (mode == SImode)
10102 if (GET_CODE (XEXP (x, 0)) == ABS)
10104 *cost = COSTS_N_INSNS (2);
10105 /* Assume the non-flag-changing variant. */
10106 if (speed_p)
10107 *cost += (extra_cost->alu.log_shift
10108 + extra_cost->alu.arith_shift);
10109 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10110 return true;
10113 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10114 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10116 *cost = COSTS_N_INSNS (2);
10117 /* No extra cost for MOV imm and MVN imm. */
10118 /* If the comparison op is using the flags, there's no further
10119 cost, otherwise we need to add the cost of the comparison. */
10120 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10121 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10122 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10124 *cost += (COSTS_N_INSNS (1)
10125 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10126 speed_p)
10127 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10128 speed_p));
10129 if (speed_p)
10130 *cost += extra_cost->alu.arith;
10132 return true;
10134 *cost = COSTS_N_INSNS (1);
10135 if (speed_p)
10136 *cost += extra_cost->alu.arith;
10137 return false;
10140 if (GET_MODE_CLASS (mode) == MODE_INT
10141 && GET_MODE_SIZE (mode) < 4)
10143 /* Slightly disparage, as we might need an extend operation. */
10144 *cost = 1 + COSTS_N_INSNS (1);
10145 if (speed_p)
10146 *cost += extra_cost->alu.arith;
10147 return false;
10150 if (mode == DImode)
10152 *cost = COSTS_N_INSNS (2);
10153 if (speed_p)
10154 *cost += 2 * extra_cost->alu.arith;
10155 return false;
10158 /* Vector mode? */
10159 *cost = LIBCALL_COST (1);
10160 return false;
10162 case NOT:
10163 if (mode == SImode)
10165 rtx shift_op;
10166 rtx shift_reg = NULL;
10168 *cost = COSTS_N_INSNS (1);
10169 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10171 if (shift_op)
10173 if (shift_reg != NULL)
10175 if (speed_p)
10176 *cost += extra_cost->alu.log_shift_reg;
10177 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10179 else if (speed_p)
10180 *cost += extra_cost->alu.log_shift;
10181 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10182 return true;
10185 if (speed_p)
10186 *cost += extra_cost->alu.logical;
10187 return false;
10189 if (mode == DImode)
10191 *cost = COSTS_N_INSNS (2);
10192 return false;
10195 /* Vector mode? */
10197 *cost += LIBCALL_COST (1);
10198 return false;
10200 case IF_THEN_ELSE:
10202 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10204 *cost = COSTS_N_INSNS (4);
10205 return true;
10207 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10208 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10210 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10211 /* Assume that if one arm of the if_then_else is a register,
10212 that it will be tied with the result and eliminate the
10213 conditional insn. */
10214 if (REG_P (XEXP (x, 1)))
10215 *cost += op2cost;
10216 else if (REG_P (XEXP (x, 2)))
10217 *cost += op1cost;
10218 else
10220 if (speed_p)
10222 if (extra_cost->alu.non_exec_costs_exec)
10223 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10224 else
10225 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10227 else
10228 *cost += op1cost + op2cost;
10231 return true;
10233 case COMPARE:
10234 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10235 *cost = 0;
10236 else
10238 enum machine_mode op0mode;
10239 /* We'll mostly assume that the cost of a compare is the cost of the
10240 LHS. However, there are some notable exceptions. */
10242 /* Floating point compares are never done as side-effects. */
10243 op0mode = GET_MODE (XEXP (x, 0));
10244 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10245 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10247 *cost = COSTS_N_INSNS (1);
10248 if (speed_p)
10249 *cost += extra_cost->fp[op0mode != SFmode].compare;
10251 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10253 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10254 return true;
10257 return false;
10259 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10261 *cost = LIBCALL_COST (2);
10262 return false;
10265 /* DImode compares normally take two insns. */
10266 if (op0mode == DImode)
10268 *cost = COSTS_N_INSNS (2);
10269 if (speed_p)
10270 *cost += 2 * extra_cost->alu.arith;
10271 return false;
10274 if (op0mode == SImode)
10276 rtx shift_op;
10277 rtx shift_reg;
10279 if (XEXP (x, 1) == const0_rtx
10280 && !(REG_P (XEXP (x, 0))
10281 || (GET_CODE (XEXP (x, 0)) == SUBREG
10282 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10284 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10286 /* Multiply operations that set the flags are often
10287 significantly more expensive. */
10288 if (speed_p
10289 && GET_CODE (XEXP (x, 0)) == MULT
10290 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10291 *cost += extra_cost->mult[0].flag_setting;
10293 if (speed_p
10294 && GET_CODE (XEXP (x, 0)) == PLUS
10295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10296 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10297 0), 1), mode))
10298 *cost += extra_cost->mult[0].flag_setting;
10299 return true;
10302 shift_reg = NULL;
10303 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10304 if (shift_op != NULL)
10306 *cost = COSTS_N_INSNS (1);
10307 if (shift_reg != NULL)
10309 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10310 if (speed_p)
10311 *cost += extra_cost->alu.arith_shift_reg;
10313 else if (speed_p)
10314 *cost += extra_cost->alu.arith_shift;
10315 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10316 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10317 return true;
10320 *cost = COSTS_N_INSNS (1);
10321 if (speed_p)
10322 *cost += extra_cost->alu.arith;
10323 if (CONST_INT_P (XEXP (x, 1))
10324 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10326 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10327 return true;
10329 return false;
10332 /* Vector mode? */
10334 *cost = LIBCALL_COST (2);
10335 return false;
10337 return true;
10339 case EQ:
10340 case NE:
10341 case LT:
10342 case LE:
10343 case GT:
10344 case GE:
10345 case LTU:
10346 case LEU:
10347 case GEU:
10348 case GTU:
10349 case ORDERED:
10350 case UNORDERED:
10351 case UNEQ:
10352 case UNLE:
10353 case UNLT:
10354 case UNGE:
10355 case UNGT:
10356 case LTGT:
10357 if (outer_code == SET)
10359 /* Is it a store-flag operation? */
10360 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10361 && XEXP (x, 1) == const0_rtx)
10363 /* Thumb also needs an IT insn. */
10364 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10365 return true;
10367 if (XEXP (x, 1) == const0_rtx)
10369 switch (code)
10371 case LT:
10372 /* LSR Rd, Rn, #31. */
10373 *cost = COSTS_N_INSNS (1);
10374 if (speed_p)
10375 *cost += extra_cost->alu.shift;
10376 break;
10378 case EQ:
10379 /* RSBS T1, Rn, #0
10380 ADC Rd, Rn, T1. */
10382 case NE:
10383 /* SUBS T1, Rn, #1
10384 SBC Rd, Rn, T1. */
10385 *cost = COSTS_N_INSNS (2);
10386 break;
10388 case LE:
10389 /* RSBS T1, Rn, Rn, LSR #31
10390 ADC Rd, Rn, T1. */
10391 *cost = COSTS_N_INSNS (2);
10392 if (speed_p)
10393 *cost += extra_cost->alu.arith_shift;
10394 break;
10396 case GT:
10397 /* RSB Rd, Rn, Rn, ASR #1
10398 LSR Rd, Rd, #31. */
10399 *cost = COSTS_N_INSNS (2);
10400 if (speed_p)
10401 *cost += (extra_cost->alu.arith_shift
10402 + extra_cost->alu.shift);
10403 break;
10405 case GE:
10406 /* ASR Rd, Rn, #31
10407 ADD Rd, Rn, #1. */
10408 *cost = COSTS_N_INSNS (2);
10409 if (speed_p)
10410 *cost += extra_cost->alu.shift;
10411 break;
10413 default:
10414 /* Remaining cases are either meaningless or would take
10415 three insns anyway. */
10416 *cost = COSTS_N_INSNS (3);
10417 break;
10419 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10420 return true;
10422 else
10424 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10425 if (CONST_INT_P (XEXP (x, 1))
10426 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10428 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10429 return true;
10432 return false;
10435 /* Not directly inside a set. If it involves the condition code
10436 register it must be the condition for a branch, cond_exec or
10437 I_T_E operation. Since the comparison is performed elsewhere
10438 this is just the control part which has no additional
10439 cost. */
10440 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10441 && XEXP (x, 1) == const0_rtx)
10443 *cost = 0;
10444 return true;
10446 return false;
10448 case ABS:
10449 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10450 && (mode == SFmode || !TARGET_VFP_SINGLE))
10452 *cost = COSTS_N_INSNS (1);
10453 if (speed_p)
10454 *cost += extra_cost->fp[mode != SFmode].neg;
10456 return false;
10458 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10460 *cost = LIBCALL_COST (1);
10461 return false;
10464 if (mode == SImode)
10466 *cost = COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469 return false;
10471 /* Vector mode? */
10472 *cost = LIBCALL_COST (1);
10473 return false;
10475 case SIGN_EXTEND:
10476 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477 && MEM_P (XEXP (x, 0)))
10479 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10481 if (mode == DImode)
10482 *cost += COSTS_N_INSNS (1);
10484 if (!speed_p)
10485 return true;
10487 if (GET_MODE (XEXP (x, 0)) == SImode)
10488 *cost += extra_cost->ldst.load;
10489 else
10490 *cost += extra_cost->ldst.load_sign_extend;
10492 if (mode == DImode)
10493 *cost += extra_cost->alu.shift;
10495 return true;
10498 /* Widening from less than 32-bits requires an extend operation. */
10499 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10501 /* We have SXTB/SXTH. */
10502 *cost = COSTS_N_INSNS (1);
10503 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10504 if (speed_p)
10505 *cost += extra_cost->alu.extend;
10507 else if (GET_MODE (XEXP (x, 0)) != SImode)
10509 /* Needs two shifts. */
10510 *cost = COSTS_N_INSNS (2);
10511 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10512 if (speed_p)
10513 *cost += 2 * extra_cost->alu.shift;
10516 /* Widening beyond 32-bits requires one more insn. */
10517 if (mode == DImode)
10519 *cost += COSTS_N_INSNS (1);
10520 if (speed_p)
10521 *cost += extra_cost->alu.shift;
10524 return true;
10526 case ZERO_EXTEND:
10527 if ((arm_arch4
10528 || GET_MODE (XEXP (x, 0)) == SImode
10529 || GET_MODE (XEXP (x, 0)) == QImode)
10530 && MEM_P (XEXP (x, 0)))
10532 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10534 if (mode == DImode)
10535 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10537 return true;
10540 /* Widening from less than 32-bits requires an extend operation. */
10541 if (GET_MODE (XEXP (x, 0)) == QImode)
10543 /* UXTB can be a shorter instruction in Thumb2, but it might
10544 be slower than the AND Rd, Rn, #255 alternative. When
10545 optimizing for speed it should never be slower to use
10546 AND, and we don't really model 16-bit vs 32-bit insns
10547 here. */
10548 *cost = COSTS_N_INSNS (1);
10549 if (speed_p)
10550 *cost += extra_cost->alu.logical;
10552 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10554 /* We have UXTB/UXTH. */
10555 *cost = COSTS_N_INSNS (1);
10556 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10557 if (speed_p)
10558 *cost += extra_cost->alu.extend;
10560 else if (GET_MODE (XEXP (x, 0)) != SImode)
10562 /* Needs two shifts. It's marginally preferable to use
10563 shifts rather than two BIC instructions as the second
10564 shift may merge with a subsequent insn as a shifter
10565 op. */
10566 *cost = COSTS_N_INSNS (2);
10567 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10568 if (speed_p)
10569 *cost += 2 * extra_cost->alu.shift;
10571 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10572 *cost = COSTS_N_INSNS (1);
10574 /* Widening beyond 32-bits requires one more insn. */
10575 if (mode == DImode)
10577 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10580 return true;
10582 case CONST_INT:
10583 *cost = 0;
10584 /* CONST_INT has no mode, so we cannot tell for sure how many
10585 insns are really going to be needed. The best we can do is
10586 look at the value passed. If it fits in SImode, then assume
10587 that's the mode it will be used for. Otherwise assume it
10588 will be used in DImode. */
10589 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10590 mode = SImode;
10591 else
10592 mode = DImode;
10594 /* Avoid blowing up in arm_gen_constant (). */
10595 if (!(outer_code == PLUS
10596 || outer_code == AND
10597 || outer_code == IOR
10598 || outer_code == XOR
10599 || outer_code == MINUS))
10600 outer_code = SET;
10602 const_int_cost:
10603 if (mode == SImode)
10605 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10606 INTVAL (x), NULL, NULL,
10607 0, 0));
10608 /* Extra costs? */
10610 else
10612 *cost += COSTS_N_INSNS (arm_gen_constant
10613 (outer_code, SImode, NULL,
10614 trunc_int_for_mode (INTVAL (x), SImode),
10615 NULL, NULL, 0, 0)
10616 + arm_gen_constant (outer_code, SImode, NULL,
10617 INTVAL (x) >> 32, NULL,
10618 NULL, 0, 0));
10619 /* Extra costs? */
10622 return true;
10624 case CONST:
10625 case LABEL_REF:
10626 case SYMBOL_REF:
10627 if (speed_p)
10629 if (arm_arch_thumb2 && !flag_pic)
10630 *cost = COSTS_N_INSNS (2);
10631 else
10632 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10634 else
10635 *cost = COSTS_N_INSNS (2);
10637 if (flag_pic)
10639 *cost += COSTS_N_INSNS (1);
10640 if (speed_p)
10641 *cost += extra_cost->alu.arith;
10644 return true;
10646 case CONST_FIXED:
10647 *cost = COSTS_N_INSNS (4);
10648 /* Fixme. */
10649 return true;
10651 case CONST_DOUBLE:
10652 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10653 && (mode == SFmode || !TARGET_VFP_SINGLE))
10655 if (vfp3_const_double_rtx (x))
10657 *cost = COSTS_N_INSNS (1);
10658 if (speed_p)
10659 *cost += extra_cost->fp[mode == DFmode].fpconst;
10660 return true;
10663 if (speed_p)
10665 *cost = COSTS_N_INSNS (1);
10666 if (mode == DFmode)
10667 *cost += extra_cost->ldst.loadd;
10668 else
10669 *cost += extra_cost->ldst.loadf;
10671 else
10672 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10674 return true;
10676 *cost = COSTS_N_INSNS (4);
10677 return true;
10679 case CONST_VECTOR:
10680 /* Fixme. */
10681 if (TARGET_NEON
10682 && TARGET_HARD_FLOAT
10683 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10684 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10685 *cost = COSTS_N_INSNS (1);
10686 else
10687 *cost = COSTS_N_INSNS (4);
10688 return true;
10690 case HIGH:
10691 case LO_SUM:
10692 *cost = COSTS_N_INSNS (1);
10693 /* When optimizing for size, we prefer constant pool entries to
10694 MOVW/MOVT pairs, so bump the cost of these slightly. */
10695 if (!speed_p)
10696 *cost += 1;
10697 return true;
10699 case CLZ:
10700 *cost = COSTS_N_INSNS (1);
10701 if (speed_p)
10702 *cost += extra_cost->alu.clz;
10703 return false;
10705 case SMIN:
10706 if (XEXP (x, 1) == const0_rtx)
10708 *cost = COSTS_N_INSNS (1);
10709 if (speed_p)
10710 *cost += extra_cost->alu.log_shift;
10711 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10712 return true;
10714 /* Fall through. */
10715 case SMAX:
10716 case UMIN:
10717 case UMAX:
10718 *cost = COSTS_N_INSNS (2);
10719 return false;
10721 case TRUNCATE:
10722 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10723 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10724 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10726 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10727 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10728 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10729 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10730 == ZERO_EXTEND))))
10732 *cost = COSTS_N_INSNS (1);
10733 if (speed_p)
10734 *cost += extra_cost->mult[1].extend;
10735 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10736 speed_p)
10737 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10738 0, speed_p));
10739 return true;
10741 *cost = LIBCALL_COST (1);
10742 return false;
10744 case UNSPEC:
10745 return arm_unspec_cost (x, outer_code, speed_p, cost);
10747 case PC:
10748 /* Reading the PC is like reading any other register. Writing it
10749 is more expensive, but we take that into account elsewhere. */
10750 *cost = 0;
10751 return true;
10753 case ZERO_EXTRACT:
10754 /* TODO: Simple zero_extract of bottom bits using AND. */
10755 /* Fall through. */
10756 case SIGN_EXTRACT:
10757 if (arm_arch6
10758 && mode == SImode
10759 && CONST_INT_P (XEXP (x, 1))
10760 && CONST_INT_P (XEXP (x, 2)))
10762 *cost = COSTS_N_INSNS (1);
10763 if (speed_p)
10764 *cost += extra_cost->alu.bfx;
10765 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10766 return true;
10768 /* Without UBFX/SBFX, need to resort to shift operations. */
10769 *cost = COSTS_N_INSNS (2);
10770 if (speed_p)
10771 *cost += 2 * extra_cost->alu.shift;
10772 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10773 return true;
10775 case FLOAT_EXTEND:
10776 if (TARGET_HARD_FLOAT)
10778 *cost = COSTS_N_INSNS (1);
10779 if (speed_p)
10780 *cost += extra_cost->fp[mode == DFmode].widen;
10781 if (!TARGET_FPU_ARMV8
10782 && GET_MODE (XEXP (x, 0)) == HFmode)
10784 /* Pre v8, widening HF->DF is a two-step process, first
10785 widening to SFmode. */
10786 *cost += COSTS_N_INSNS (1);
10787 if (speed_p)
10788 *cost += extra_cost->fp[0].widen;
10790 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10791 return true;
10794 *cost = LIBCALL_COST (1);
10795 return false;
10797 case FLOAT_TRUNCATE:
10798 if (TARGET_HARD_FLOAT)
10800 *cost = COSTS_N_INSNS (1);
10801 if (speed_p)
10802 *cost += extra_cost->fp[mode == DFmode].narrow;
10803 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10804 return true;
10805 /* Vector modes? */
10807 *cost = LIBCALL_COST (1);
10808 return false;
10810 case FMA:
10811 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10813 rtx op0 = XEXP (x, 0);
10814 rtx op1 = XEXP (x, 1);
10815 rtx op2 = XEXP (x, 2);
10817 *cost = COSTS_N_INSNS (1);
10819 /* vfms or vfnma. */
10820 if (GET_CODE (op0) == NEG)
10821 op0 = XEXP (op0, 0);
10823 /* vfnms or vfnma. */
10824 if (GET_CODE (op2) == NEG)
10825 op2 = XEXP (op2, 0);
10827 *cost += rtx_cost (op0, FMA, 0, speed_p);
10828 *cost += rtx_cost (op1, FMA, 1, speed_p);
10829 *cost += rtx_cost (op2, FMA, 2, speed_p);
10831 if (speed_p)
10832 *cost += extra_cost->fp[mode ==DFmode].fma;
10834 return true;
10837 *cost = LIBCALL_COST (3);
10838 return false;
10840 case FIX:
10841 case UNSIGNED_FIX:
10842 if (TARGET_HARD_FLOAT)
10844 if (GET_MODE_CLASS (mode) == MODE_INT)
10846 *cost = COSTS_N_INSNS (1);
10847 if (speed_p)
10848 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10849 /* Strip of the 'cost' of rounding towards zero. */
10850 if (GET_CODE (XEXP (x, 0)) == FIX)
10851 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10852 else
10853 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10854 /* ??? Increase the cost to deal with transferring from
10855 FP -> CORE registers? */
10856 return true;
10858 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10859 && TARGET_FPU_ARMV8)
10861 *cost = COSTS_N_INSNS (1);
10862 if (speed_p)
10863 *cost += extra_cost->fp[mode == DFmode].roundint;
10864 return false;
10866 /* Vector costs? */
10868 *cost = LIBCALL_COST (1);
10869 return false;
10871 case FLOAT:
10872 case UNSIGNED_FLOAT:
10873 if (TARGET_HARD_FLOAT)
10875 /* ??? Increase the cost to deal with transferring from CORE
10876 -> FP registers? */
10877 *cost = COSTS_N_INSNS (1);
10878 if (speed_p)
10879 *cost += extra_cost->fp[mode == DFmode].fromint;
10880 return false;
10882 *cost = LIBCALL_COST (1);
10883 return false;
10885 case CALL:
10886 *cost = COSTS_N_INSNS (1);
10887 return true;
10889 case ASM_OPERANDS:
10891 /* Just a guess. Guess number of instructions in the asm
10892 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10893 though (see PR60663). */
10894 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10895 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10897 *cost = COSTS_N_INSNS (asm_length + num_operands);
10898 return true;
10900 default:
10901 if (mode != VOIDmode)
10902 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10903 else
10904 *cost = COSTS_N_INSNS (4); /* Who knows? */
10905 return false;
10909 #undef HANDLE_NARROW_SHIFT_ARITH
10911 /* RTX costs when optimizing for size. */
10912 static bool
10913 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10914 int *total, bool speed)
10916 bool result;
10918 if (TARGET_OLD_RTX_COSTS
10919 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10921 /* Old way. (Deprecated.) */
10922 if (!speed)
10923 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10924 (enum rtx_code) outer_code, total);
10925 else
10926 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10927 (enum rtx_code) outer_code, total,
10928 speed);
10930 else
10932 /* New way. */
10933 if (current_tune->insn_extra_cost)
10934 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10935 (enum rtx_code) outer_code,
10936 current_tune->insn_extra_cost,
10937 total, speed);
10938 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10939 && current_tune->insn_extra_cost != NULL */
10940 else
10941 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10942 (enum rtx_code) outer_code,
10943 &generic_extra_costs, total, speed);
10946 if (dump_file && (dump_flags & TDF_DETAILS))
10948 print_rtl_single (dump_file, x);
10949 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10950 *total, result ? "final" : "partial");
10952 return result;
10955 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10956 supported on any "slowmul" cores, so it can be ignored. */
10958 static bool
10959 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10960 int *total, bool speed)
10962 enum machine_mode mode = GET_MODE (x);
10964 if (TARGET_THUMB)
10966 *total = thumb1_rtx_costs (x, code, outer_code);
10967 return true;
10970 switch (code)
10972 case MULT:
10973 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10974 || mode == DImode)
10976 *total = COSTS_N_INSNS (20);
10977 return false;
10980 if (CONST_INT_P (XEXP (x, 1)))
10982 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10983 & (unsigned HOST_WIDE_INT) 0xffffffff);
10984 int cost, const_ok = const_ok_for_arm (i);
10985 int j, booth_unit_size;
10987 /* Tune as appropriate. */
10988 cost = const_ok ? 4 : 8;
10989 booth_unit_size = 2;
10990 for (j = 0; i && j < 32; j += booth_unit_size)
10992 i >>= booth_unit_size;
10993 cost++;
10996 *total = COSTS_N_INSNS (cost);
10997 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10998 return true;
11001 *total = COSTS_N_INSNS (20);
11002 return false;
11004 default:
11005 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11010 /* RTX cost for cores with a fast multiply unit (M variants). */
11012 static bool
11013 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11014 int *total, bool speed)
11016 enum machine_mode mode = GET_MODE (x);
11018 if (TARGET_THUMB1)
11020 *total = thumb1_rtx_costs (x, code, outer_code);
11021 return true;
11024 /* ??? should thumb2 use different costs? */
11025 switch (code)
11027 case MULT:
11028 /* There is no point basing this on the tuning, since it is always the
11029 fast variant if it exists at all. */
11030 if (mode == DImode
11031 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11032 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11033 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11035 *total = COSTS_N_INSNS(2);
11036 return false;
11040 if (mode == DImode)
11042 *total = COSTS_N_INSNS (5);
11043 return false;
11046 if (CONST_INT_P (XEXP (x, 1)))
11048 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11049 & (unsigned HOST_WIDE_INT) 0xffffffff);
11050 int cost, const_ok = const_ok_for_arm (i);
11051 int j, booth_unit_size;
11053 /* Tune as appropriate. */
11054 cost = const_ok ? 4 : 8;
11055 booth_unit_size = 8;
11056 for (j = 0; i && j < 32; j += booth_unit_size)
11058 i >>= booth_unit_size;
11059 cost++;
11062 *total = COSTS_N_INSNS(cost);
11063 return false;
11066 if (mode == SImode)
11068 *total = COSTS_N_INSNS (4);
11069 return false;
11072 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11074 if (TARGET_HARD_FLOAT
11075 && (mode == SFmode
11076 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11078 *total = COSTS_N_INSNS (1);
11079 return false;
11083 /* Requires a lib call */
11084 *total = COSTS_N_INSNS (20);
11085 return false;
11087 default:
11088 return arm_rtx_costs_1 (x, outer_code, total, speed);
11093 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11094 so it can be ignored. */
11096 static bool
11097 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11098 int *total, bool speed)
11100 enum machine_mode mode = GET_MODE (x);
11102 if (TARGET_THUMB)
11104 *total = thumb1_rtx_costs (x, code, outer_code);
11105 return true;
11108 switch (code)
11110 case COMPARE:
11111 if (GET_CODE (XEXP (x, 0)) != MULT)
11112 return arm_rtx_costs_1 (x, outer_code, total, speed);
11114 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11115 will stall until the multiplication is complete. */
11116 *total = COSTS_N_INSNS (3);
11117 return false;
11119 case MULT:
11120 /* There is no point basing this on the tuning, since it is always the
11121 fast variant if it exists at all. */
11122 if (mode == DImode
11123 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11124 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11125 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11127 *total = COSTS_N_INSNS (2);
11128 return false;
11132 if (mode == DImode)
11134 *total = COSTS_N_INSNS (5);
11135 return false;
11138 if (CONST_INT_P (XEXP (x, 1)))
11140 /* If operand 1 is a constant we can more accurately
11141 calculate the cost of the multiply. The multiplier can
11142 retire 15 bits on the first cycle and a further 12 on the
11143 second. We do, of course, have to load the constant into
11144 a register first. */
11145 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11146 /* There's a general overhead of one cycle. */
11147 int cost = 1;
11148 unsigned HOST_WIDE_INT masked_const;
11150 if (i & 0x80000000)
11151 i = ~i;
11153 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11155 masked_const = i & 0xffff8000;
11156 if (masked_const != 0)
11158 cost++;
11159 masked_const = i & 0xf8000000;
11160 if (masked_const != 0)
11161 cost++;
11163 *total = COSTS_N_INSNS (cost);
11164 return false;
11167 if (mode == SImode)
11169 *total = COSTS_N_INSNS (3);
11170 return false;
11173 /* Requires a lib call */
11174 *total = COSTS_N_INSNS (20);
11175 return false;
11177 default:
11178 return arm_rtx_costs_1 (x, outer_code, total, speed);
11183 /* RTX costs for 9e (and later) cores. */
11185 static bool
11186 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11187 int *total, bool speed)
11189 enum machine_mode mode = GET_MODE (x);
11191 if (TARGET_THUMB1)
11193 switch (code)
11195 case MULT:
11196 *total = COSTS_N_INSNS (3);
11197 return true;
11199 default:
11200 *total = thumb1_rtx_costs (x, code, outer_code);
11201 return true;
11205 switch (code)
11207 case MULT:
11208 /* There is no point basing this on the tuning, since it is always the
11209 fast variant if it exists at all. */
11210 if (mode == DImode
11211 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11212 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11213 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11215 *total = COSTS_N_INSNS (2);
11216 return false;
11220 if (mode == DImode)
11222 *total = COSTS_N_INSNS (5);
11223 return false;
11226 if (mode == SImode)
11228 *total = COSTS_N_INSNS (2);
11229 return false;
11232 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11234 if (TARGET_HARD_FLOAT
11235 && (mode == SFmode
11236 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11238 *total = COSTS_N_INSNS (1);
11239 return false;
11243 *total = COSTS_N_INSNS (20);
11244 return false;
11246 default:
11247 return arm_rtx_costs_1 (x, outer_code, total, speed);
11250 /* All address computations that can be done are free, but rtx cost returns
11251 the same for practically all of them. So we weight the different types
11252 of address here in the order (most pref first):
11253 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11254 static inline int
11255 arm_arm_address_cost (rtx x)
11257 enum rtx_code c = GET_CODE (x);
11259 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11260 return 0;
11261 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11262 return 10;
11264 if (c == PLUS)
11266 if (CONST_INT_P (XEXP (x, 1)))
11267 return 2;
11269 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11270 return 3;
11272 return 4;
11275 return 6;
11278 static inline int
11279 arm_thumb_address_cost (rtx x)
11281 enum rtx_code c = GET_CODE (x);
11283 if (c == REG)
11284 return 1;
11285 if (c == PLUS
11286 && REG_P (XEXP (x, 0))
11287 && CONST_INT_P (XEXP (x, 1)))
11288 return 1;
11290 return 2;
11293 static int
11294 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11295 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11297 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11300 /* Adjust cost hook for XScale. */
11301 static bool
11302 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11304 /* Some true dependencies can have a higher cost depending
11305 on precisely how certain input operands are used. */
11306 if (REG_NOTE_KIND(link) == 0
11307 && recog_memoized (insn) >= 0
11308 && recog_memoized (dep) >= 0)
11310 int shift_opnum = get_attr_shift (insn);
11311 enum attr_type attr_type = get_attr_type (dep);
11313 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11314 operand for INSN. If we have a shifted input operand and the
11315 instruction we depend on is another ALU instruction, then we may
11316 have to account for an additional stall. */
11317 if (shift_opnum != 0
11318 && (attr_type == TYPE_ALU_SHIFT_IMM
11319 || attr_type == TYPE_ALUS_SHIFT_IMM
11320 || attr_type == TYPE_LOGIC_SHIFT_IMM
11321 || attr_type == TYPE_LOGICS_SHIFT_IMM
11322 || attr_type == TYPE_ALU_SHIFT_REG
11323 || attr_type == TYPE_ALUS_SHIFT_REG
11324 || attr_type == TYPE_LOGIC_SHIFT_REG
11325 || attr_type == TYPE_LOGICS_SHIFT_REG
11326 || attr_type == TYPE_MOV_SHIFT
11327 || attr_type == TYPE_MVN_SHIFT
11328 || attr_type == TYPE_MOV_SHIFT_REG
11329 || attr_type == TYPE_MVN_SHIFT_REG))
11331 rtx shifted_operand;
11332 int opno;
11334 /* Get the shifted operand. */
11335 extract_insn (insn);
11336 shifted_operand = recog_data.operand[shift_opnum];
11338 /* Iterate over all the operands in DEP. If we write an operand
11339 that overlaps with SHIFTED_OPERAND, then we have increase the
11340 cost of this dependency. */
11341 extract_insn (dep);
11342 preprocess_constraints ();
11343 for (opno = 0; opno < recog_data.n_operands; opno++)
11345 /* We can ignore strict inputs. */
11346 if (recog_data.operand_type[opno] == OP_IN)
11347 continue;
11349 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11350 shifted_operand))
11352 *cost = 2;
11353 return false;
11358 return true;
11361 /* Adjust cost hook for Cortex A9. */
11362 static bool
11363 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11365 switch (REG_NOTE_KIND (link))
11367 case REG_DEP_ANTI:
11368 *cost = 0;
11369 return false;
11371 case REG_DEP_TRUE:
11372 case REG_DEP_OUTPUT:
11373 if (recog_memoized (insn) >= 0
11374 && recog_memoized (dep) >= 0)
11376 if (GET_CODE (PATTERN (insn)) == SET)
11378 if (GET_MODE_CLASS
11379 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11380 || GET_MODE_CLASS
11381 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11383 enum attr_type attr_type_insn = get_attr_type (insn);
11384 enum attr_type attr_type_dep = get_attr_type (dep);
11386 /* By default all dependencies of the form
11387 s0 = s0 <op> s1
11388 s0 = s0 <op> s2
11389 have an extra latency of 1 cycle because
11390 of the input and output dependency in this
11391 case. However this gets modeled as an true
11392 dependency and hence all these checks. */
11393 if (REG_P (SET_DEST (PATTERN (insn)))
11394 && REG_P (SET_DEST (PATTERN (dep)))
11395 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11396 SET_DEST (PATTERN (dep))))
11398 /* FMACS is a special case where the dependent
11399 instruction can be issued 3 cycles before
11400 the normal latency in case of an output
11401 dependency. */
11402 if ((attr_type_insn == TYPE_FMACS
11403 || attr_type_insn == TYPE_FMACD)
11404 && (attr_type_dep == TYPE_FMACS
11405 || attr_type_dep == TYPE_FMACD))
11407 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11408 *cost = insn_default_latency (dep) - 3;
11409 else
11410 *cost = insn_default_latency (dep);
11411 return false;
11413 else
11415 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11416 *cost = insn_default_latency (dep) + 1;
11417 else
11418 *cost = insn_default_latency (dep);
11420 return false;
11425 break;
11427 default:
11428 gcc_unreachable ();
11431 return true;
11434 /* Adjust cost hook for FA726TE. */
11435 static bool
11436 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11438 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11439 have penalty of 3. */
11440 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11441 && recog_memoized (insn) >= 0
11442 && recog_memoized (dep) >= 0
11443 && get_attr_conds (dep) == CONDS_SET)
11445 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11446 if (get_attr_conds (insn) == CONDS_USE
11447 && get_attr_type (insn) != TYPE_BRANCH)
11449 *cost = 3;
11450 return false;
11453 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11454 || get_attr_conds (insn) == CONDS_USE)
11456 *cost = 0;
11457 return false;
11461 return true;
11464 /* Implement TARGET_REGISTER_MOVE_COST.
11466 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11467 it is typically more expensive than a single memory access. We set
11468 the cost to less than two memory accesses so that floating
11469 point to integer conversion does not go through memory. */
11472 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11473 reg_class_t from, reg_class_t to)
11475 if (TARGET_32BIT)
11477 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11478 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11479 return 15;
11480 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11481 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11482 return 4;
11483 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11484 return 20;
11485 else
11486 return 2;
11488 else
11490 if (from == HI_REGS || to == HI_REGS)
11491 return 4;
11492 else
11493 return 2;
11497 /* Implement TARGET_MEMORY_MOVE_COST. */
11500 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11501 bool in ATTRIBUTE_UNUSED)
11503 if (TARGET_32BIT)
11504 return 10;
11505 else
11507 if (GET_MODE_SIZE (mode) < 4)
11508 return 8;
11509 else
11510 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11514 /* Vectorizer cost model implementation. */
11516 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11517 static int
11518 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11519 tree vectype,
11520 int misalign ATTRIBUTE_UNUSED)
11522 unsigned elements;
11524 switch (type_of_cost)
11526 case scalar_stmt:
11527 return current_tune->vec_costs->scalar_stmt_cost;
11529 case scalar_load:
11530 return current_tune->vec_costs->scalar_load_cost;
11532 case scalar_store:
11533 return current_tune->vec_costs->scalar_store_cost;
11535 case vector_stmt:
11536 return current_tune->vec_costs->vec_stmt_cost;
11538 case vector_load:
11539 return current_tune->vec_costs->vec_align_load_cost;
11541 case vector_store:
11542 return current_tune->vec_costs->vec_store_cost;
11544 case vec_to_scalar:
11545 return current_tune->vec_costs->vec_to_scalar_cost;
11547 case scalar_to_vec:
11548 return current_tune->vec_costs->scalar_to_vec_cost;
11550 case unaligned_load:
11551 return current_tune->vec_costs->vec_unalign_load_cost;
11553 case unaligned_store:
11554 return current_tune->vec_costs->vec_unalign_store_cost;
11556 case cond_branch_taken:
11557 return current_tune->vec_costs->cond_taken_branch_cost;
11559 case cond_branch_not_taken:
11560 return current_tune->vec_costs->cond_not_taken_branch_cost;
11562 case vec_perm:
11563 case vec_promote_demote:
11564 return current_tune->vec_costs->vec_stmt_cost;
11566 case vec_construct:
11567 elements = TYPE_VECTOR_SUBPARTS (vectype);
11568 return elements / 2 + 1;
11570 default:
11571 gcc_unreachable ();
11575 /* Implement targetm.vectorize.add_stmt_cost. */
11577 static unsigned
11578 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11579 struct _stmt_vec_info *stmt_info, int misalign,
11580 enum vect_cost_model_location where)
11582 unsigned *cost = (unsigned *) data;
11583 unsigned retval = 0;
11585 if (flag_vect_cost_model)
11587 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11588 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11590 /* Statements in an inner loop relative to the loop being
11591 vectorized are weighted more heavily. The value here is
11592 arbitrary and could potentially be improved with analysis. */
11593 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11594 count *= 50; /* FIXME. */
11596 retval = (unsigned) (count * stmt_cost);
11597 cost[where] += retval;
11600 return retval;
11603 /* Return true if and only if this insn can dual-issue only as older. */
11604 static bool
11605 cortexa7_older_only (rtx insn)
11607 if (recog_memoized (insn) < 0)
11608 return false;
11610 switch (get_attr_type (insn))
11612 case TYPE_ALU_REG:
11613 case TYPE_ALUS_REG:
11614 case TYPE_LOGIC_REG:
11615 case TYPE_LOGICS_REG:
11616 case TYPE_ADC_REG:
11617 case TYPE_ADCS_REG:
11618 case TYPE_ADR:
11619 case TYPE_BFM:
11620 case TYPE_REV:
11621 case TYPE_MVN_REG:
11622 case TYPE_SHIFT_IMM:
11623 case TYPE_SHIFT_REG:
11624 case TYPE_LOAD_BYTE:
11625 case TYPE_LOAD1:
11626 case TYPE_STORE1:
11627 case TYPE_FFARITHS:
11628 case TYPE_FADDS:
11629 case TYPE_FFARITHD:
11630 case TYPE_FADDD:
11631 case TYPE_FMOV:
11632 case TYPE_F_CVT:
11633 case TYPE_FCMPS:
11634 case TYPE_FCMPD:
11635 case TYPE_FCONSTS:
11636 case TYPE_FCONSTD:
11637 case TYPE_FMULS:
11638 case TYPE_FMACS:
11639 case TYPE_FMULD:
11640 case TYPE_FMACD:
11641 case TYPE_FDIVS:
11642 case TYPE_FDIVD:
11643 case TYPE_F_MRC:
11644 case TYPE_F_MRRC:
11645 case TYPE_F_FLAG:
11646 case TYPE_F_LOADS:
11647 case TYPE_F_STORES:
11648 return true;
11649 default:
11650 return false;
11654 /* Return true if and only if this insn can dual-issue as younger. */
11655 static bool
11656 cortexa7_younger (FILE *file, int verbose, rtx insn)
11658 if (recog_memoized (insn) < 0)
11660 if (verbose > 5)
11661 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11662 return false;
11665 switch (get_attr_type (insn))
11667 case TYPE_ALU_IMM:
11668 case TYPE_ALUS_IMM:
11669 case TYPE_LOGIC_IMM:
11670 case TYPE_LOGICS_IMM:
11671 case TYPE_EXTEND:
11672 case TYPE_MVN_IMM:
11673 case TYPE_MOV_IMM:
11674 case TYPE_MOV_REG:
11675 case TYPE_MOV_SHIFT:
11676 case TYPE_MOV_SHIFT_REG:
11677 case TYPE_BRANCH:
11678 case TYPE_CALL:
11679 return true;
11680 default:
11681 return false;
11686 /* Look for an instruction that can dual issue only as an older
11687 instruction, and move it in front of any instructions that can
11688 dual-issue as younger, while preserving the relative order of all
11689 other instructions in the ready list. This is a hueuristic to help
11690 dual-issue in later cycles, by postponing issue of more flexible
11691 instructions. This heuristic may affect dual issue opportunities
11692 in the current cycle. */
11693 static void
11694 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11695 int clock)
11697 int i;
11698 int first_older_only = -1, first_younger = -1;
11700 if (verbose > 5)
11701 fprintf (file,
11702 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11703 clock,
11704 *n_readyp);
11706 /* Traverse the ready list from the head (the instruction to issue
11707 first), and looking for the first instruction that can issue as
11708 younger and the first instruction that can dual-issue only as
11709 older. */
11710 for (i = *n_readyp - 1; i >= 0; i--)
11712 rtx insn = ready[i];
11713 if (cortexa7_older_only (insn))
11715 first_older_only = i;
11716 if (verbose > 5)
11717 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11718 break;
11720 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11721 first_younger = i;
11724 /* Nothing to reorder because either no younger insn found or insn
11725 that can dual-issue only as older appears before any insn that
11726 can dual-issue as younger. */
11727 if (first_younger == -1)
11729 if (verbose > 5)
11730 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11731 return;
11734 /* Nothing to reorder because no older-only insn in the ready list. */
11735 if (first_older_only == -1)
11737 if (verbose > 5)
11738 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11739 return;
11742 /* Move first_older_only insn before first_younger. */
11743 if (verbose > 5)
11744 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11745 INSN_UID(ready [first_older_only]),
11746 INSN_UID(ready [first_younger]));
11747 rtx first_older_only_insn = ready [first_older_only];
11748 for (i = first_older_only; i < first_younger; i++)
11750 ready[i] = ready[i+1];
11753 ready[i] = first_older_only_insn;
11754 return;
11757 /* Implement TARGET_SCHED_REORDER. */
11758 static int
11759 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11760 int clock)
11762 switch (arm_tune)
11764 case cortexa7:
11765 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11766 break;
11767 default:
11768 /* Do nothing for other cores. */
11769 break;
11772 return arm_issue_rate ();
11775 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11776 It corrects the value of COST based on the relationship between
11777 INSN and DEP through the dependence LINK. It returns the new
11778 value. There is a per-core adjust_cost hook to adjust scheduler costs
11779 and the per-core hook can choose to completely override the generic
11780 adjust_cost function. Only put bits of code into arm_adjust_cost that
11781 are common across all cores. */
11782 static int
11783 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11785 rtx i_pat, d_pat;
11787 /* When generating Thumb-1 code, we want to place flag-setting operations
11788 close to a conditional branch which depends on them, so that we can
11789 omit the comparison. */
11790 if (TARGET_THUMB1
11791 && REG_NOTE_KIND (link) == 0
11792 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11793 && recog_memoized (dep) >= 0
11794 && get_attr_conds (dep) == CONDS_SET)
11795 return 0;
11797 if (current_tune->sched_adjust_cost != NULL)
11799 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11800 return cost;
11803 /* XXX Is this strictly true? */
11804 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11805 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11806 return 0;
11808 /* Call insns don't incur a stall, even if they follow a load. */
11809 if (REG_NOTE_KIND (link) == 0
11810 && CALL_P (insn))
11811 return 1;
11813 if ((i_pat = single_set (insn)) != NULL
11814 && MEM_P (SET_SRC (i_pat))
11815 && (d_pat = single_set (dep)) != NULL
11816 && MEM_P (SET_DEST (d_pat)))
11818 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11819 /* This is a load after a store, there is no conflict if the load reads
11820 from a cached area. Assume that loads from the stack, and from the
11821 constant pool are cached, and that others will miss. This is a
11822 hack. */
11824 if ((GET_CODE (src_mem) == SYMBOL_REF
11825 && CONSTANT_POOL_ADDRESS_P (src_mem))
11826 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11827 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11828 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11829 return 1;
11832 return cost;
11836 arm_max_conditional_execute (void)
11838 return max_insns_skipped;
11841 static int
11842 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11844 if (TARGET_32BIT)
11845 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11846 else
11847 return (optimize > 0) ? 2 : 0;
11850 static int
11851 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11853 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11856 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11857 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11858 sequences of non-executed instructions in IT blocks probably take the same
11859 amount of time as executed instructions (and the IT instruction itself takes
11860 space in icache). This function was experimentally determined to give good
11861 results on a popular embedded benchmark. */
11863 static int
11864 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11866 return (TARGET_32BIT && speed_p) ? 1
11867 : arm_default_branch_cost (speed_p, predictable_p);
11870 static bool fp_consts_inited = false;
11872 static REAL_VALUE_TYPE value_fp0;
11874 static void
11875 init_fp_table (void)
11877 REAL_VALUE_TYPE r;
11879 r = REAL_VALUE_ATOF ("0", DFmode);
11880 value_fp0 = r;
11881 fp_consts_inited = true;
11884 /* Return TRUE if rtx X is a valid immediate FP constant. */
11886 arm_const_double_rtx (rtx x)
11888 REAL_VALUE_TYPE r;
11890 if (!fp_consts_inited)
11891 init_fp_table ();
11893 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11894 if (REAL_VALUE_MINUS_ZERO (r))
11895 return 0;
11897 if (REAL_VALUES_EQUAL (r, value_fp0))
11898 return 1;
11900 return 0;
11903 /* VFPv3 has a fairly wide range of representable immediates, formed from
11904 "quarter-precision" floating-point values. These can be evaluated using this
11905 formula (with ^ for exponentiation):
11907 -1^s * n * 2^-r
11909 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11910 16 <= n <= 31 and 0 <= r <= 7.
11912 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11914 - A (most-significant) is the sign bit.
11915 - BCD are the exponent (encoded as r XOR 3).
11916 - EFGH are the mantissa (encoded as n - 16).
11919 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11920 fconst[sd] instruction, or -1 if X isn't suitable. */
11921 static int
11922 vfp3_const_double_index (rtx x)
11924 REAL_VALUE_TYPE r, m;
11925 int sign, exponent;
11926 unsigned HOST_WIDE_INT mantissa, mant_hi;
11927 unsigned HOST_WIDE_INT mask;
11928 HOST_WIDE_INT m1, m2;
11929 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11931 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11932 return -1;
11934 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11936 /* We can't represent these things, so detect them first. */
11937 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11938 return -1;
11940 /* Extract sign, exponent and mantissa. */
11941 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11942 r = real_value_abs (&r);
11943 exponent = REAL_EXP (&r);
11944 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11945 highest (sign) bit, with a fixed binary point at bit point_pos.
11946 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11947 bits for the mantissa, this may fail (low bits would be lost). */
11948 real_ldexp (&m, &r, point_pos - exponent);
11949 REAL_VALUE_TO_INT (&m1, &m2, m);
11950 mantissa = m1;
11951 mant_hi = m2;
11953 /* If there are bits set in the low part of the mantissa, we can't
11954 represent this value. */
11955 if (mantissa != 0)
11956 return -1;
11958 /* Now make it so that mantissa contains the most-significant bits, and move
11959 the point_pos to indicate that the least-significant bits have been
11960 discarded. */
11961 point_pos -= HOST_BITS_PER_WIDE_INT;
11962 mantissa = mant_hi;
11964 /* We can permit four significant bits of mantissa only, plus a high bit
11965 which is always 1. */
11966 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11967 if ((mantissa & mask) != 0)
11968 return -1;
11970 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11971 mantissa >>= point_pos - 5;
11973 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11974 floating-point immediate zero with Neon using an integer-zero load, but
11975 that case is handled elsewhere.) */
11976 if (mantissa == 0)
11977 return -1;
11979 gcc_assert (mantissa >= 16 && mantissa <= 31);
11981 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11982 normalized significands are in the range [1, 2). (Our mantissa is shifted
11983 left 4 places at this point relative to normalized IEEE754 values). GCC
11984 internally uses [0.5, 1) (see real.c), so the exponent returned from
11985 REAL_EXP must be altered. */
11986 exponent = 5 - exponent;
11988 if (exponent < 0 || exponent > 7)
11989 return -1;
11991 /* Sign, mantissa and exponent are now in the correct form to plug into the
11992 formula described in the comment above. */
11993 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11996 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11998 vfp3_const_double_rtx (rtx x)
12000 if (!TARGET_VFP3)
12001 return 0;
12003 return vfp3_const_double_index (x) != -1;
12006 /* Recognize immediates which can be used in various Neon instructions. Legal
12007 immediates are described by the following table (for VMVN variants, the
12008 bitwise inverse of the constant shown is recognized. In either case, VMOV
12009 is output and the correct instruction to use for a given constant is chosen
12010 by the assembler). The constant shown is replicated across all elements of
12011 the destination vector.
12013 insn elems variant constant (binary)
12014 ---- ----- ------- -----------------
12015 vmov i32 0 00000000 00000000 00000000 abcdefgh
12016 vmov i32 1 00000000 00000000 abcdefgh 00000000
12017 vmov i32 2 00000000 abcdefgh 00000000 00000000
12018 vmov i32 3 abcdefgh 00000000 00000000 00000000
12019 vmov i16 4 00000000 abcdefgh
12020 vmov i16 5 abcdefgh 00000000
12021 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12022 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12023 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12024 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12025 vmvn i16 10 00000000 abcdefgh
12026 vmvn i16 11 abcdefgh 00000000
12027 vmov i32 12 00000000 00000000 abcdefgh 11111111
12028 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12029 vmov i32 14 00000000 abcdefgh 11111111 11111111
12030 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12031 vmov i8 16 abcdefgh
12032 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12033 eeeeeeee ffffffff gggggggg hhhhhhhh
12034 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12035 vmov f32 19 00000000 00000000 00000000 00000000
12037 For case 18, B = !b. Representable values are exactly those accepted by
12038 vfp3_const_double_index, but are output as floating-point numbers rather
12039 than indices.
12041 For case 19, we will change it to vmov.i32 when assembling.
12043 Variants 0-5 (inclusive) may also be used as immediates for the second
12044 operand of VORR/VBIC instructions.
12046 The INVERSE argument causes the bitwise inverse of the given operand to be
12047 recognized instead (used for recognizing legal immediates for the VAND/VORN
12048 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12049 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12050 output, rather than the real insns vbic/vorr).
12052 INVERSE makes no difference to the recognition of float vectors.
12054 The return value is the variant of immediate as shown in the above table, or
12055 -1 if the given value doesn't match any of the listed patterns.
12057 static int
12058 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12059 rtx *modconst, int *elementwidth)
12061 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12062 matches = 1; \
12063 for (i = 0; i < idx; i += (STRIDE)) \
12064 if (!(TEST)) \
12065 matches = 0; \
12066 if (matches) \
12068 immtype = (CLASS); \
12069 elsize = (ELSIZE); \
12070 break; \
12073 unsigned int i, elsize = 0, idx = 0, n_elts;
12074 unsigned int innersize;
12075 unsigned char bytes[16];
12076 int immtype = -1, matches;
12077 unsigned int invmask = inverse ? 0xff : 0;
12078 bool vector = GET_CODE (op) == CONST_VECTOR;
12080 if (vector)
12082 n_elts = CONST_VECTOR_NUNITS (op);
12083 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12085 else
12087 n_elts = 1;
12088 if (mode == VOIDmode)
12089 mode = DImode;
12090 innersize = GET_MODE_SIZE (mode);
12093 /* Vectors of float constants. */
12094 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12096 rtx el0 = CONST_VECTOR_ELT (op, 0);
12097 REAL_VALUE_TYPE r0;
12099 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12100 return -1;
12102 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12104 for (i = 1; i < n_elts; i++)
12106 rtx elt = CONST_VECTOR_ELT (op, i);
12107 REAL_VALUE_TYPE re;
12109 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12111 if (!REAL_VALUES_EQUAL (r0, re))
12112 return -1;
12115 if (modconst)
12116 *modconst = CONST_VECTOR_ELT (op, 0);
12118 if (elementwidth)
12119 *elementwidth = 0;
12121 if (el0 == CONST0_RTX (GET_MODE (el0)))
12122 return 19;
12123 else
12124 return 18;
12127 /* Splat vector constant out into a byte vector. */
12128 for (i = 0; i < n_elts; i++)
12130 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12131 unsigned HOST_WIDE_INT elpart;
12132 unsigned int part, parts;
12134 if (CONST_INT_P (el))
12136 elpart = INTVAL (el);
12137 parts = 1;
12139 else if (CONST_DOUBLE_P (el))
12141 elpart = CONST_DOUBLE_LOW (el);
12142 parts = 2;
12144 else
12145 gcc_unreachable ();
12147 for (part = 0; part < parts; part++)
12149 unsigned int byte;
12150 for (byte = 0; byte < innersize; byte++)
12152 bytes[idx++] = (elpart & 0xff) ^ invmask;
12153 elpart >>= BITS_PER_UNIT;
12155 if (CONST_DOUBLE_P (el))
12156 elpart = CONST_DOUBLE_HIGH (el);
12160 /* Sanity check. */
12161 gcc_assert (idx == GET_MODE_SIZE (mode));
12165 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12166 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12168 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12169 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12171 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12172 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12174 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12175 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12177 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12179 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12181 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12182 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12184 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12185 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12187 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12188 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12190 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12191 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12193 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12195 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12197 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12198 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12200 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12201 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12203 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12204 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12206 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12207 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12209 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12211 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12212 && bytes[i] == bytes[(i + 8) % idx]);
12214 while (0);
12216 if (immtype == -1)
12217 return -1;
12219 if (elementwidth)
12220 *elementwidth = elsize;
12222 if (modconst)
12224 unsigned HOST_WIDE_INT imm = 0;
12226 /* Un-invert bytes of recognized vector, if necessary. */
12227 if (invmask != 0)
12228 for (i = 0; i < idx; i++)
12229 bytes[i] ^= invmask;
12231 if (immtype == 17)
12233 /* FIXME: Broken on 32-bit H_W_I hosts. */
12234 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12236 for (i = 0; i < 8; i++)
12237 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12238 << (i * BITS_PER_UNIT);
12240 *modconst = GEN_INT (imm);
12242 else
12244 unsigned HOST_WIDE_INT imm = 0;
12246 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12247 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12249 *modconst = GEN_INT (imm);
12253 return immtype;
12254 #undef CHECK
12257 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12258 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12259 float elements), and a modified constant (whatever should be output for a
12260 VMOV) in *MODCONST. */
12263 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12264 rtx *modconst, int *elementwidth)
12266 rtx tmpconst;
12267 int tmpwidth;
12268 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12270 if (retval == -1)
12271 return 0;
12273 if (modconst)
12274 *modconst = tmpconst;
12276 if (elementwidth)
12277 *elementwidth = tmpwidth;
12279 return 1;
12282 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12283 the immediate is valid, write a constant suitable for using as an operand
12284 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12285 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12288 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12289 rtx *modconst, int *elementwidth)
12291 rtx tmpconst;
12292 int tmpwidth;
12293 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12295 if (retval < 0 || retval > 5)
12296 return 0;
12298 if (modconst)
12299 *modconst = tmpconst;
12301 if (elementwidth)
12302 *elementwidth = tmpwidth;
12304 return 1;
12307 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12308 the immediate is valid, write a constant suitable for using as an operand
12309 to VSHR/VSHL to *MODCONST and the corresponding element width to
12310 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12311 because they have different limitations. */
12314 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12315 rtx *modconst, int *elementwidth,
12316 bool isleftshift)
12318 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12319 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12320 unsigned HOST_WIDE_INT last_elt = 0;
12321 unsigned HOST_WIDE_INT maxshift;
12323 /* Split vector constant out into a byte vector. */
12324 for (i = 0; i < n_elts; i++)
12326 rtx el = CONST_VECTOR_ELT (op, i);
12327 unsigned HOST_WIDE_INT elpart;
12329 if (CONST_INT_P (el))
12330 elpart = INTVAL (el);
12331 else if (CONST_DOUBLE_P (el))
12332 return 0;
12333 else
12334 gcc_unreachable ();
12336 if (i != 0 && elpart != last_elt)
12337 return 0;
12339 last_elt = elpart;
12342 /* Shift less than element size. */
12343 maxshift = innersize * 8;
12345 if (isleftshift)
12347 /* Left shift immediate value can be from 0 to <size>-1. */
12348 if (last_elt >= maxshift)
12349 return 0;
12351 else
12353 /* Right shift immediate value can be from 1 to <size>. */
12354 if (last_elt == 0 || last_elt > maxshift)
12355 return 0;
12358 if (elementwidth)
12359 *elementwidth = innersize * 8;
12361 if (modconst)
12362 *modconst = CONST_VECTOR_ELT (op, 0);
12364 return 1;
12367 /* Return a string suitable for output of Neon immediate logic operation
12368 MNEM. */
12370 char *
12371 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12372 int inverse, int quad)
12374 int width, is_valid;
12375 static char templ[40];
12377 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12379 gcc_assert (is_valid != 0);
12381 if (quad)
12382 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12383 else
12384 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12386 return templ;
12389 /* Return a string suitable for output of Neon immediate shift operation
12390 (VSHR or VSHL) MNEM. */
12392 char *
12393 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12394 enum machine_mode mode, int quad,
12395 bool isleftshift)
12397 int width, is_valid;
12398 static char templ[40];
12400 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12401 gcc_assert (is_valid != 0);
12403 if (quad)
12404 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12405 else
12406 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12408 return templ;
12411 /* Output a sequence of pairwise operations to implement a reduction.
12412 NOTE: We do "too much work" here, because pairwise operations work on two
12413 registers-worth of operands in one go. Unfortunately we can't exploit those
12414 extra calculations to do the full operation in fewer steps, I don't think.
12415 Although all vector elements of the result but the first are ignored, we
12416 actually calculate the same result in each of the elements. An alternative
12417 such as initially loading a vector with zero to use as each of the second
12418 operands would use up an additional register and take an extra instruction,
12419 for no particular gain. */
12421 void
12422 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12423 rtx (*reduc) (rtx, rtx, rtx))
12425 enum machine_mode inner = GET_MODE_INNER (mode);
12426 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12427 rtx tmpsum = op1;
12429 for (i = parts / 2; i >= 1; i /= 2)
12431 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12432 emit_insn (reduc (dest, tmpsum, tmpsum));
12433 tmpsum = dest;
12437 /* If VALS is a vector constant that can be loaded into a register
12438 using VDUP, generate instructions to do so and return an RTX to
12439 assign to the register. Otherwise return NULL_RTX. */
12441 static rtx
12442 neon_vdup_constant (rtx vals)
12444 enum machine_mode mode = GET_MODE (vals);
12445 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12446 int n_elts = GET_MODE_NUNITS (mode);
12447 bool all_same = true;
12448 rtx x;
12449 int i;
12451 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12452 return NULL_RTX;
12454 for (i = 0; i < n_elts; ++i)
12456 x = XVECEXP (vals, 0, i);
12457 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12458 all_same = false;
12461 if (!all_same)
12462 /* The elements are not all the same. We could handle repeating
12463 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12464 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12465 vdup.i16). */
12466 return NULL_RTX;
12468 /* We can load this constant by using VDUP and a constant in a
12469 single ARM register. This will be cheaper than a vector
12470 load. */
12472 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12473 return gen_rtx_VEC_DUPLICATE (mode, x);
12476 /* Generate code to load VALS, which is a PARALLEL containing only
12477 constants (for vec_init) or CONST_VECTOR, efficiently into a
12478 register. Returns an RTX to copy into the register, or NULL_RTX
12479 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12482 neon_make_constant (rtx vals)
12484 enum machine_mode mode = GET_MODE (vals);
12485 rtx target;
12486 rtx const_vec = NULL_RTX;
12487 int n_elts = GET_MODE_NUNITS (mode);
12488 int n_const = 0;
12489 int i;
12491 if (GET_CODE (vals) == CONST_VECTOR)
12492 const_vec = vals;
12493 else if (GET_CODE (vals) == PARALLEL)
12495 /* A CONST_VECTOR must contain only CONST_INTs and
12496 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12497 Only store valid constants in a CONST_VECTOR. */
12498 for (i = 0; i < n_elts; ++i)
12500 rtx x = XVECEXP (vals, 0, i);
12501 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12502 n_const++;
12504 if (n_const == n_elts)
12505 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12507 else
12508 gcc_unreachable ();
12510 if (const_vec != NULL
12511 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12512 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12513 return const_vec;
12514 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12515 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12516 pipeline cycle; creating the constant takes one or two ARM
12517 pipeline cycles. */
12518 return target;
12519 else if (const_vec != NULL_RTX)
12520 /* Load from constant pool. On Cortex-A8 this takes two cycles
12521 (for either double or quad vectors). We can not take advantage
12522 of single-cycle VLD1 because we need a PC-relative addressing
12523 mode. */
12524 return const_vec;
12525 else
12526 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12527 We can not construct an initializer. */
12528 return NULL_RTX;
12531 /* Initialize vector TARGET to VALS. */
12533 void
12534 neon_expand_vector_init (rtx target, rtx vals)
12536 enum machine_mode mode = GET_MODE (target);
12537 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12538 int n_elts = GET_MODE_NUNITS (mode);
12539 int n_var = 0, one_var = -1;
12540 bool all_same = true;
12541 rtx x, mem;
12542 int i;
12544 for (i = 0; i < n_elts; ++i)
12546 x = XVECEXP (vals, 0, i);
12547 if (!CONSTANT_P (x))
12548 ++n_var, one_var = i;
12550 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12551 all_same = false;
12554 if (n_var == 0)
12556 rtx constant = neon_make_constant (vals);
12557 if (constant != NULL_RTX)
12559 emit_move_insn (target, constant);
12560 return;
12564 /* Splat a single non-constant element if we can. */
12565 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12567 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12568 emit_insn (gen_rtx_SET (VOIDmode, target,
12569 gen_rtx_VEC_DUPLICATE (mode, x)));
12570 return;
12573 /* One field is non-constant. Load constant then overwrite varying
12574 field. This is more efficient than using the stack. */
12575 if (n_var == 1)
12577 rtx copy = copy_rtx (vals);
12578 rtx index = GEN_INT (one_var);
12580 /* Load constant part of vector, substitute neighboring value for
12581 varying element. */
12582 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12583 neon_expand_vector_init (target, copy);
12585 /* Insert variable. */
12586 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12587 switch (mode)
12589 case V8QImode:
12590 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12591 break;
12592 case V16QImode:
12593 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12594 break;
12595 case V4HImode:
12596 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12597 break;
12598 case V8HImode:
12599 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12600 break;
12601 case V2SImode:
12602 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12603 break;
12604 case V4SImode:
12605 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12606 break;
12607 case V2SFmode:
12608 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12609 break;
12610 case V4SFmode:
12611 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12612 break;
12613 case V2DImode:
12614 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12615 break;
12616 default:
12617 gcc_unreachable ();
12619 return;
12622 /* Construct the vector in memory one field at a time
12623 and load the whole vector. */
12624 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12625 for (i = 0; i < n_elts; i++)
12626 emit_move_insn (adjust_address_nv (mem, inner_mode,
12627 i * GET_MODE_SIZE (inner_mode)),
12628 XVECEXP (vals, 0, i));
12629 emit_move_insn (target, mem);
12632 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12633 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12634 reported source locations are bogus. */
12636 static void
12637 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12638 const char *err)
12640 HOST_WIDE_INT lane;
12642 gcc_assert (CONST_INT_P (operand));
12644 lane = INTVAL (operand);
12646 if (lane < low || lane >= high)
12647 error (err);
12650 /* Bounds-check lanes. */
12652 void
12653 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12655 bounds_check (operand, low, high, "lane out of range");
12658 /* Bounds-check constants. */
12660 void
12661 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12663 bounds_check (operand, low, high, "constant out of range");
12666 HOST_WIDE_INT
12667 neon_element_bits (enum machine_mode mode)
12669 if (mode == DImode)
12670 return GET_MODE_BITSIZE (mode);
12671 else
12672 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12676 /* Predicates for `match_operand' and `match_operator'. */
12678 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12679 WB is true if full writeback address modes are allowed and is false
12680 if limited writeback address modes (POST_INC and PRE_DEC) are
12681 allowed. */
12684 arm_coproc_mem_operand (rtx op, bool wb)
12686 rtx ind;
12688 /* Reject eliminable registers. */
12689 if (! (reload_in_progress || reload_completed || lra_in_progress)
12690 && ( reg_mentioned_p (frame_pointer_rtx, op)
12691 || reg_mentioned_p (arg_pointer_rtx, op)
12692 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12693 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12694 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12695 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12696 return FALSE;
12698 /* Constants are converted into offsets from labels. */
12699 if (!MEM_P (op))
12700 return FALSE;
12702 ind = XEXP (op, 0);
12704 if (reload_completed
12705 && (GET_CODE (ind) == LABEL_REF
12706 || (GET_CODE (ind) == CONST
12707 && GET_CODE (XEXP (ind, 0)) == PLUS
12708 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12709 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12710 return TRUE;
12712 /* Match: (mem (reg)). */
12713 if (REG_P (ind))
12714 return arm_address_register_rtx_p (ind, 0);
12716 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12717 acceptable in any case (subject to verification by
12718 arm_address_register_rtx_p). We need WB to be true to accept
12719 PRE_INC and POST_DEC. */
12720 if (GET_CODE (ind) == POST_INC
12721 || GET_CODE (ind) == PRE_DEC
12722 || (wb
12723 && (GET_CODE (ind) == PRE_INC
12724 || GET_CODE (ind) == POST_DEC)))
12725 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12727 if (wb
12728 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12729 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12730 && GET_CODE (XEXP (ind, 1)) == PLUS
12731 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12732 ind = XEXP (ind, 1);
12734 /* Match:
12735 (plus (reg)
12736 (const)). */
12737 if (GET_CODE (ind) == PLUS
12738 && REG_P (XEXP (ind, 0))
12739 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12740 && CONST_INT_P (XEXP (ind, 1))
12741 && INTVAL (XEXP (ind, 1)) > -1024
12742 && INTVAL (XEXP (ind, 1)) < 1024
12743 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12744 return TRUE;
12746 return FALSE;
12749 /* Return TRUE if OP is a memory operand which we can load or store a vector
12750 to/from. TYPE is one of the following values:
12751 0 - Vector load/stor (vldr)
12752 1 - Core registers (ldm)
12753 2 - Element/structure loads (vld1)
12756 neon_vector_mem_operand (rtx op, int type, bool strict)
12758 rtx ind;
12760 /* Reject eliminable registers. */
12761 if (! (reload_in_progress || reload_completed)
12762 && ( reg_mentioned_p (frame_pointer_rtx, op)
12763 || reg_mentioned_p (arg_pointer_rtx, op)
12764 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12765 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12766 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12767 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12768 return !strict;
12770 /* Constants are converted into offsets from labels. */
12771 if (!MEM_P (op))
12772 return FALSE;
12774 ind = XEXP (op, 0);
12776 if (reload_completed
12777 && (GET_CODE (ind) == LABEL_REF
12778 || (GET_CODE (ind) == CONST
12779 && GET_CODE (XEXP (ind, 0)) == PLUS
12780 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12781 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12782 return TRUE;
12784 /* Match: (mem (reg)). */
12785 if (REG_P (ind))
12786 return arm_address_register_rtx_p (ind, 0);
12788 /* Allow post-increment with Neon registers. */
12789 if ((type != 1 && GET_CODE (ind) == POST_INC)
12790 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12791 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12793 /* FIXME: vld1 allows register post-modify. */
12795 /* Match:
12796 (plus (reg)
12797 (const)). */
12798 if (type == 0
12799 && GET_CODE (ind) == PLUS
12800 && REG_P (XEXP (ind, 0))
12801 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12802 && CONST_INT_P (XEXP (ind, 1))
12803 && INTVAL (XEXP (ind, 1)) > -1024
12804 /* For quad modes, we restrict the constant offset to be slightly less
12805 than what the instruction format permits. We have no such constraint
12806 on double mode offsets. (This must match arm_legitimate_index_p.) */
12807 && (INTVAL (XEXP (ind, 1))
12808 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12809 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12810 return TRUE;
12812 return FALSE;
12815 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12816 type. */
12818 neon_struct_mem_operand (rtx op)
12820 rtx ind;
12822 /* Reject eliminable registers. */
12823 if (! (reload_in_progress || reload_completed)
12824 && ( reg_mentioned_p (frame_pointer_rtx, op)
12825 || reg_mentioned_p (arg_pointer_rtx, op)
12826 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12827 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12828 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12829 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12830 return FALSE;
12832 /* Constants are converted into offsets from labels. */
12833 if (!MEM_P (op))
12834 return FALSE;
12836 ind = XEXP (op, 0);
12838 if (reload_completed
12839 && (GET_CODE (ind) == LABEL_REF
12840 || (GET_CODE (ind) == CONST
12841 && GET_CODE (XEXP (ind, 0)) == PLUS
12842 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12843 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12844 return TRUE;
12846 /* Match: (mem (reg)). */
12847 if (REG_P (ind))
12848 return arm_address_register_rtx_p (ind, 0);
12850 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12851 if (GET_CODE (ind) == POST_INC
12852 || GET_CODE (ind) == PRE_DEC)
12853 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12855 return FALSE;
12858 /* Return true if X is a register that will be eliminated later on. */
12860 arm_eliminable_register (rtx x)
12862 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12863 || REGNO (x) == ARG_POINTER_REGNUM
12864 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12865 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12868 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12869 coprocessor registers. Otherwise return NO_REGS. */
12871 enum reg_class
12872 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12874 if (mode == HFmode)
12876 if (!TARGET_NEON_FP16)
12877 return GENERAL_REGS;
12878 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12879 return NO_REGS;
12880 return GENERAL_REGS;
12883 /* The neon move patterns handle all legitimate vector and struct
12884 addresses. */
12885 if (TARGET_NEON
12886 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12887 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12888 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12889 || VALID_NEON_STRUCT_MODE (mode)))
12890 return NO_REGS;
12892 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12893 return NO_REGS;
12895 return GENERAL_REGS;
12898 /* Values which must be returned in the most-significant end of the return
12899 register. */
12901 static bool
12902 arm_return_in_msb (const_tree valtype)
12904 return (TARGET_AAPCS_BASED
12905 && BYTES_BIG_ENDIAN
12906 && (AGGREGATE_TYPE_P (valtype)
12907 || TREE_CODE (valtype) == COMPLEX_TYPE
12908 || FIXED_POINT_TYPE_P (valtype)));
12911 /* Return TRUE if X references a SYMBOL_REF. */
12913 symbol_mentioned_p (rtx x)
12915 const char * fmt;
12916 int i;
12918 if (GET_CODE (x) == SYMBOL_REF)
12919 return 1;
12921 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12922 are constant offsets, not symbols. */
12923 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12924 return 0;
12926 fmt = GET_RTX_FORMAT (GET_CODE (x));
12928 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12930 if (fmt[i] == 'E')
12932 int j;
12934 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12935 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12936 return 1;
12938 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12939 return 1;
12942 return 0;
12945 /* Return TRUE if X references a LABEL_REF. */
12947 label_mentioned_p (rtx x)
12949 const char * fmt;
12950 int i;
12952 if (GET_CODE (x) == LABEL_REF)
12953 return 1;
12955 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12956 instruction, but they are constant offsets, not symbols. */
12957 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12958 return 0;
12960 fmt = GET_RTX_FORMAT (GET_CODE (x));
12961 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12963 if (fmt[i] == 'E')
12965 int j;
12967 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12968 if (label_mentioned_p (XVECEXP (x, i, j)))
12969 return 1;
12971 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12972 return 1;
12975 return 0;
12979 tls_mentioned_p (rtx x)
12981 switch (GET_CODE (x))
12983 case CONST:
12984 return tls_mentioned_p (XEXP (x, 0));
12986 case UNSPEC:
12987 if (XINT (x, 1) == UNSPEC_TLS)
12988 return 1;
12990 default:
12991 return 0;
12995 /* Must not copy any rtx that uses a pc-relative address. */
12997 static int
12998 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13000 if (GET_CODE (*x) == UNSPEC
13001 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13002 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13003 return 1;
13004 return 0;
13007 static bool
13008 arm_cannot_copy_insn_p (rtx insn)
13010 /* The tls call insn cannot be copied, as it is paired with a data
13011 word. */
13012 if (recog_memoized (insn) == CODE_FOR_tlscall)
13013 return true;
13015 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13018 enum rtx_code
13019 minmax_code (rtx x)
13021 enum rtx_code code = GET_CODE (x);
13023 switch (code)
13025 case SMAX:
13026 return GE;
13027 case SMIN:
13028 return LE;
13029 case UMIN:
13030 return LEU;
13031 case UMAX:
13032 return GEU;
13033 default:
13034 gcc_unreachable ();
13038 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13040 bool
13041 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13042 int *mask, bool *signed_sat)
13044 /* The high bound must be a power of two minus one. */
13045 int log = exact_log2 (INTVAL (hi_bound) + 1);
13046 if (log == -1)
13047 return false;
13049 /* The low bound is either zero (for usat) or one less than the
13050 negation of the high bound (for ssat). */
13051 if (INTVAL (lo_bound) == 0)
13053 if (mask)
13054 *mask = log;
13055 if (signed_sat)
13056 *signed_sat = false;
13058 return true;
13061 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13063 if (mask)
13064 *mask = log + 1;
13065 if (signed_sat)
13066 *signed_sat = true;
13068 return true;
13071 return false;
13074 /* Return 1 if memory locations are adjacent. */
13076 adjacent_mem_locations (rtx a, rtx b)
13078 /* We don't guarantee to preserve the order of these memory refs. */
13079 if (volatile_refs_p (a) || volatile_refs_p (b))
13080 return 0;
13082 if ((REG_P (XEXP (a, 0))
13083 || (GET_CODE (XEXP (a, 0)) == PLUS
13084 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13085 && (REG_P (XEXP (b, 0))
13086 || (GET_CODE (XEXP (b, 0)) == PLUS
13087 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13089 HOST_WIDE_INT val0 = 0, val1 = 0;
13090 rtx reg0, reg1;
13091 int val_diff;
13093 if (GET_CODE (XEXP (a, 0)) == PLUS)
13095 reg0 = XEXP (XEXP (a, 0), 0);
13096 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13098 else
13099 reg0 = XEXP (a, 0);
13101 if (GET_CODE (XEXP (b, 0)) == PLUS)
13103 reg1 = XEXP (XEXP (b, 0), 0);
13104 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13106 else
13107 reg1 = XEXP (b, 0);
13109 /* Don't accept any offset that will require multiple
13110 instructions to handle, since this would cause the
13111 arith_adjacentmem pattern to output an overlong sequence. */
13112 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13113 return 0;
13115 /* Don't allow an eliminable register: register elimination can make
13116 the offset too large. */
13117 if (arm_eliminable_register (reg0))
13118 return 0;
13120 val_diff = val1 - val0;
13122 if (arm_ld_sched)
13124 /* If the target has load delay slots, then there's no benefit
13125 to using an ldm instruction unless the offset is zero and
13126 we are optimizing for size. */
13127 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13128 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13129 && (val_diff == 4 || val_diff == -4));
13132 return ((REGNO (reg0) == REGNO (reg1))
13133 && (val_diff == 4 || val_diff == -4));
13136 return 0;
13139 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13140 for load operations, false for store operations. CONSECUTIVE is true
13141 if the register numbers in the operation must be consecutive in the register
13142 bank. RETURN_PC is true if value is to be loaded in PC.
13143 The pattern we are trying to match for load is:
13144 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13145 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13148 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13150 where
13151 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13152 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13153 3. If consecutive is TRUE, then for kth register being loaded,
13154 REGNO (R_dk) = REGNO (R_d0) + k.
13155 The pattern for store is similar. */
13156 bool
13157 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13158 bool consecutive, bool return_pc)
13160 HOST_WIDE_INT count = XVECLEN (op, 0);
13161 rtx reg, mem, addr;
13162 unsigned regno;
13163 unsigned first_regno;
13164 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13165 rtx elt;
13166 bool addr_reg_in_reglist = false;
13167 bool update = false;
13168 int reg_increment;
13169 int offset_adj;
13170 int regs_per_val;
13172 /* If not in SImode, then registers must be consecutive
13173 (e.g., VLDM instructions for DFmode). */
13174 gcc_assert ((mode == SImode) || consecutive);
13175 /* Setting return_pc for stores is illegal. */
13176 gcc_assert (!return_pc || load);
13178 /* Set up the increments and the regs per val based on the mode. */
13179 reg_increment = GET_MODE_SIZE (mode);
13180 regs_per_val = reg_increment / 4;
13181 offset_adj = return_pc ? 1 : 0;
13183 if (count <= 1
13184 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13185 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13186 return false;
13188 /* Check if this is a write-back. */
13189 elt = XVECEXP (op, 0, offset_adj);
13190 if (GET_CODE (SET_SRC (elt)) == PLUS)
13192 i++;
13193 base = 1;
13194 update = true;
13196 /* The offset adjustment must be the number of registers being
13197 popped times the size of a single register. */
13198 if (!REG_P (SET_DEST (elt))
13199 || !REG_P (XEXP (SET_SRC (elt), 0))
13200 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13201 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13202 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13203 ((count - 1 - offset_adj) * reg_increment))
13204 return false;
13207 i = i + offset_adj;
13208 base = base + offset_adj;
13209 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13210 success depends on the type: VLDM can do just one reg,
13211 LDM must do at least two. */
13212 if ((count <= i) && (mode == SImode))
13213 return false;
13215 elt = XVECEXP (op, 0, i - 1);
13216 if (GET_CODE (elt) != SET)
13217 return false;
13219 if (load)
13221 reg = SET_DEST (elt);
13222 mem = SET_SRC (elt);
13224 else
13226 reg = SET_SRC (elt);
13227 mem = SET_DEST (elt);
13230 if (!REG_P (reg) || !MEM_P (mem))
13231 return false;
13233 regno = REGNO (reg);
13234 first_regno = regno;
13235 addr = XEXP (mem, 0);
13236 if (GET_CODE (addr) == PLUS)
13238 if (!CONST_INT_P (XEXP (addr, 1)))
13239 return false;
13241 offset = INTVAL (XEXP (addr, 1));
13242 addr = XEXP (addr, 0);
13245 if (!REG_P (addr))
13246 return false;
13248 /* Don't allow SP to be loaded unless it is also the base register. It
13249 guarantees that SP is reset correctly when an LDM instruction
13250 is interrupted. Otherwise, we might end up with a corrupt stack. */
13251 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13252 return false;
13254 for (; i < count; i++)
13256 elt = XVECEXP (op, 0, i);
13257 if (GET_CODE (elt) != SET)
13258 return false;
13260 if (load)
13262 reg = SET_DEST (elt);
13263 mem = SET_SRC (elt);
13265 else
13267 reg = SET_SRC (elt);
13268 mem = SET_DEST (elt);
13271 if (!REG_P (reg)
13272 || GET_MODE (reg) != mode
13273 || REGNO (reg) <= regno
13274 || (consecutive
13275 && (REGNO (reg) !=
13276 (unsigned int) (first_regno + regs_per_val * (i - base))))
13277 /* Don't allow SP to be loaded unless it is also the base register. It
13278 guarantees that SP is reset correctly when an LDM instruction
13279 is interrupted. Otherwise, we might end up with a corrupt stack. */
13280 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13281 || !MEM_P (mem)
13282 || GET_MODE (mem) != mode
13283 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13284 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13285 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13286 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13287 offset + (i - base) * reg_increment))
13288 && (!REG_P (XEXP (mem, 0))
13289 || offset + (i - base) * reg_increment != 0)))
13290 return false;
13292 regno = REGNO (reg);
13293 if (regno == REGNO (addr))
13294 addr_reg_in_reglist = true;
13297 if (load)
13299 if (update && addr_reg_in_reglist)
13300 return false;
13302 /* For Thumb-1, address register is always modified - either by write-back
13303 or by explicit load. If the pattern does not describe an update,
13304 then the address register must be in the list of loaded registers. */
13305 if (TARGET_THUMB1)
13306 return update || addr_reg_in_reglist;
13309 return true;
13312 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13313 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13314 instruction. ADD_OFFSET is nonzero if the base address register needs
13315 to be modified with an add instruction before we can use it. */
13317 static bool
13318 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13319 int nops, HOST_WIDE_INT add_offset)
13321 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13322 if the offset isn't small enough. The reason 2 ldrs are faster
13323 is because these ARMs are able to do more than one cache access
13324 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13325 whilst the ARM8 has a double bandwidth cache. This means that
13326 these cores can do both an instruction fetch and a data fetch in
13327 a single cycle, so the trick of calculating the address into a
13328 scratch register (one of the result regs) and then doing a load
13329 multiple actually becomes slower (and no smaller in code size).
13330 That is the transformation
13332 ldr rd1, [rbase + offset]
13333 ldr rd2, [rbase + offset + 4]
13337 add rd1, rbase, offset
13338 ldmia rd1, {rd1, rd2}
13340 produces worse code -- '3 cycles + any stalls on rd2' instead of
13341 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13342 access per cycle, the first sequence could never complete in less
13343 than 6 cycles, whereas the ldm sequence would only take 5 and
13344 would make better use of sequential accesses if not hitting the
13345 cache.
13347 We cheat here and test 'arm_ld_sched' which we currently know to
13348 only be true for the ARM8, ARM9 and StrongARM. If this ever
13349 changes, then the test below needs to be reworked. */
13350 if (nops == 2 && arm_ld_sched && add_offset != 0)
13351 return false;
13353 /* XScale has load-store double instructions, but they have stricter
13354 alignment requirements than load-store multiple, so we cannot
13355 use them.
13357 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13358 the pipeline until completion.
13360 NREGS CYCLES
13366 An ldr instruction takes 1-3 cycles, but does not block the
13367 pipeline.
13369 NREGS CYCLES
13370 1 1-3
13371 2 2-6
13372 3 3-9
13373 4 4-12
13375 Best case ldr will always win. However, the more ldr instructions
13376 we issue, the less likely we are to be able to schedule them well.
13377 Using ldr instructions also increases code size.
13379 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13380 for counts of 3 or 4 regs. */
13381 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13382 return false;
13383 return true;
13386 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13387 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13388 an array ORDER which describes the sequence to use when accessing the
13389 offsets that produces an ascending order. In this sequence, each
13390 offset must be larger by exactly 4 than the previous one. ORDER[0]
13391 must have been filled in with the lowest offset by the caller.
13392 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13393 we use to verify that ORDER produces an ascending order of registers.
13394 Return true if it was possible to construct such an order, false if
13395 not. */
13397 static bool
13398 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13399 int *unsorted_regs)
13401 int i;
13402 for (i = 1; i < nops; i++)
13404 int j;
13406 order[i] = order[i - 1];
13407 for (j = 0; j < nops; j++)
13408 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13410 /* We must find exactly one offset that is higher than the
13411 previous one by 4. */
13412 if (order[i] != order[i - 1])
13413 return false;
13414 order[i] = j;
13416 if (order[i] == order[i - 1])
13417 return false;
13418 /* The register numbers must be ascending. */
13419 if (unsorted_regs != NULL
13420 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13421 return false;
13423 return true;
13426 /* Used to determine in a peephole whether a sequence of load
13427 instructions can be changed into a load-multiple instruction.
13428 NOPS is the number of separate load instructions we are examining. The
13429 first NOPS entries in OPERANDS are the destination registers, the
13430 next NOPS entries are memory operands. If this function is
13431 successful, *BASE is set to the common base register of the memory
13432 accesses; *LOAD_OFFSET is set to the first memory location's offset
13433 from that base register.
13434 REGS is an array filled in with the destination register numbers.
13435 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13436 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13437 the sequence of registers in REGS matches the loads from ascending memory
13438 locations, and the function verifies that the register numbers are
13439 themselves ascending. If CHECK_REGS is false, the register numbers
13440 are stored in the order they are found in the operands. */
13441 static int
13442 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13443 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13445 int unsorted_regs[MAX_LDM_STM_OPS];
13446 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13447 int order[MAX_LDM_STM_OPS];
13448 rtx base_reg_rtx = NULL;
13449 int base_reg = -1;
13450 int i, ldm_case;
13452 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13453 easily extended if required. */
13454 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13456 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13458 /* Loop over the operands and check that the memory references are
13459 suitable (i.e. immediate offsets from the same base register). At
13460 the same time, extract the target register, and the memory
13461 offsets. */
13462 for (i = 0; i < nops; i++)
13464 rtx reg;
13465 rtx offset;
13467 /* Convert a subreg of a mem into the mem itself. */
13468 if (GET_CODE (operands[nops + i]) == SUBREG)
13469 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13471 gcc_assert (MEM_P (operands[nops + i]));
13473 /* Don't reorder volatile memory references; it doesn't seem worth
13474 looking for the case where the order is ok anyway. */
13475 if (MEM_VOLATILE_P (operands[nops + i]))
13476 return 0;
13478 offset = const0_rtx;
13480 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13481 || (GET_CODE (reg) == SUBREG
13482 && REG_P (reg = SUBREG_REG (reg))))
13483 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13484 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13485 || (GET_CODE (reg) == SUBREG
13486 && REG_P (reg = SUBREG_REG (reg))))
13487 && (CONST_INT_P (offset
13488 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13490 if (i == 0)
13492 base_reg = REGNO (reg);
13493 base_reg_rtx = reg;
13494 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13495 return 0;
13497 else if (base_reg != (int) REGNO (reg))
13498 /* Not addressed from the same base register. */
13499 return 0;
13501 unsorted_regs[i] = (REG_P (operands[i])
13502 ? REGNO (operands[i])
13503 : REGNO (SUBREG_REG (operands[i])));
13505 /* If it isn't an integer register, or if it overwrites the
13506 base register but isn't the last insn in the list, then
13507 we can't do this. */
13508 if (unsorted_regs[i] < 0
13509 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13510 || unsorted_regs[i] > 14
13511 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13512 return 0;
13514 /* Don't allow SP to be loaded unless it is also the base
13515 register. It guarantees that SP is reset correctly when
13516 an LDM instruction is interrupted. Otherwise, we might
13517 end up with a corrupt stack. */
13518 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13519 return 0;
13521 unsorted_offsets[i] = INTVAL (offset);
13522 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13523 order[0] = i;
13525 else
13526 /* Not a suitable memory address. */
13527 return 0;
13530 /* All the useful information has now been extracted from the
13531 operands into unsorted_regs and unsorted_offsets; additionally,
13532 order[0] has been set to the lowest offset in the list. Sort
13533 the offsets into order, verifying that they are adjacent, and
13534 check that the register numbers are ascending. */
13535 if (!compute_offset_order (nops, unsorted_offsets, order,
13536 check_regs ? unsorted_regs : NULL))
13537 return 0;
13539 if (saved_order)
13540 memcpy (saved_order, order, sizeof order);
13542 if (base)
13544 *base = base_reg;
13546 for (i = 0; i < nops; i++)
13547 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13549 *load_offset = unsorted_offsets[order[0]];
13552 if (TARGET_THUMB1
13553 && !peep2_reg_dead_p (nops, base_reg_rtx))
13554 return 0;
13556 if (unsorted_offsets[order[0]] == 0)
13557 ldm_case = 1; /* ldmia */
13558 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13559 ldm_case = 2; /* ldmib */
13560 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13561 ldm_case = 3; /* ldmda */
13562 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13563 ldm_case = 4; /* ldmdb */
13564 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13565 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13566 ldm_case = 5;
13567 else
13568 return 0;
13570 if (!multiple_operation_profitable_p (false, nops,
13571 ldm_case == 5
13572 ? unsorted_offsets[order[0]] : 0))
13573 return 0;
13575 return ldm_case;
13578 /* Used to determine in a peephole whether a sequence of store instructions can
13579 be changed into a store-multiple instruction.
13580 NOPS is the number of separate store instructions we are examining.
13581 NOPS_TOTAL is the total number of instructions recognized by the peephole
13582 pattern.
13583 The first NOPS entries in OPERANDS are the source registers, the next
13584 NOPS entries are memory operands. If this function is successful, *BASE is
13585 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13586 to the first memory location's offset from that base register. REGS is an
13587 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13588 likewise filled with the corresponding rtx's.
13589 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13590 numbers to an ascending order of stores.
13591 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13592 from ascending memory locations, and the function verifies that the register
13593 numbers are themselves ascending. If CHECK_REGS is false, the register
13594 numbers are stored in the order they are found in the operands. */
13595 static int
13596 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13597 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13598 HOST_WIDE_INT *load_offset, bool check_regs)
13600 int unsorted_regs[MAX_LDM_STM_OPS];
13601 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13602 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13603 int order[MAX_LDM_STM_OPS];
13604 int base_reg = -1;
13605 rtx base_reg_rtx = NULL;
13606 int i, stm_case;
13608 /* Write back of base register is currently only supported for Thumb 1. */
13609 int base_writeback = TARGET_THUMB1;
13611 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13612 easily extended if required. */
13613 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13615 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13617 /* Loop over the operands and check that the memory references are
13618 suitable (i.e. immediate offsets from the same base register). At
13619 the same time, extract the target register, and the memory
13620 offsets. */
13621 for (i = 0; i < nops; i++)
13623 rtx reg;
13624 rtx offset;
13626 /* Convert a subreg of a mem into the mem itself. */
13627 if (GET_CODE (operands[nops + i]) == SUBREG)
13628 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13630 gcc_assert (MEM_P (operands[nops + i]));
13632 /* Don't reorder volatile memory references; it doesn't seem worth
13633 looking for the case where the order is ok anyway. */
13634 if (MEM_VOLATILE_P (operands[nops + i]))
13635 return 0;
13637 offset = const0_rtx;
13639 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13640 || (GET_CODE (reg) == SUBREG
13641 && REG_P (reg = SUBREG_REG (reg))))
13642 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13643 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13644 || (GET_CODE (reg) == SUBREG
13645 && REG_P (reg = SUBREG_REG (reg))))
13646 && (CONST_INT_P (offset
13647 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13649 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13650 ? operands[i] : SUBREG_REG (operands[i]));
13651 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13653 if (i == 0)
13655 base_reg = REGNO (reg);
13656 base_reg_rtx = reg;
13657 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13658 return 0;
13660 else if (base_reg != (int) REGNO (reg))
13661 /* Not addressed from the same base register. */
13662 return 0;
13664 /* If it isn't an integer register, then we can't do this. */
13665 if (unsorted_regs[i] < 0
13666 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13667 /* The effects are unpredictable if the base register is
13668 both updated and stored. */
13669 || (base_writeback && unsorted_regs[i] == base_reg)
13670 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13671 || unsorted_regs[i] > 14)
13672 return 0;
13674 unsorted_offsets[i] = INTVAL (offset);
13675 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13676 order[0] = i;
13678 else
13679 /* Not a suitable memory address. */
13680 return 0;
13683 /* All the useful information has now been extracted from the
13684 operands into unsorted_regs and unsorted_offsets; additionally,
13685 order[0] has been set to the lowest offset in the list. Sort
13686 the offsets into order, verifying that they are adjacent, and
13687 check that the register numbers are ascending. */
13688 if (!compute_offset_order (nops, unsorted_offsets, order,
13689 check_regs ? unsorted_regs : NULL))
13690 return 0;
13692 if (saved_order)
13693 memcpy (saved_order, order, sizeof order);
13695 if (base)
13697 *base = base_reg;
13699 for (i = 0; i < nops; i++)
13701 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13702 if (reg_rtxs)
13703 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13706 *load_offset = unsorted_offsets[order[0]];
13709 if (TARGET_THUMB1
13710 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13711 return 0;
13713 if (unsorted_offsets[order[0]] == 0)
13714 stm_case = 1; /* stmia */
13715 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13716 stm_case = 2; /* stmib */
13717 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13718 stm_case = 3; /* stmda */
13719 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13720 stm_case = 4; /* stmdb */
13721 else
13722 return 0;
13724 if (!multiple_operation_profitable_p (false, nops, 0))
13725 return 0;
13727 return stm_case;
13730 /* Routines for use in generating RTL. */
13732 /* Generate a load-multiple instruction. COUNT is the number of loads in
13733 the instruction; REGS and MEMS are arrays containing the operands.
13734 BASEREG is the base register to be used in addressing the memory operands.
13735 WBACK_OFFSET is nonzero if the instruction should update the base
13736 register. */
13738 static rtx
13739 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13740 HOST_WIDE_INT wback_offset)
13742 int i = 0, j;
13743 rtx result;
13745 if (!multiple_operation_profitable_p (false, count, 0))
13747 rtx seq;
13749 start_sequence ();
13751 for (i = 0; i < count; i++)
13752 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13754 if (wback_offset != 0)
13755 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13757 seq = get_insns ();
13758 end_sequence ();
13760 return seq;
13763 result = gen_rtx_PARALLEL (VOIDmode,
13764 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13765 if (wback_offset != 0)
13767 XVECEXP (result, 0, 0)
13768 = gen_rtx_SET (VOIDmode, basereg,
13769 plus_constant (Pmode, basereg, wback_offset));
13770 i = 1;
13771 count++;
13774 for (j = 0; i < count; i++, j++)
13775 XVECEXP (result, 0, i)
13776 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13778 return result;
13781 /* Generate a store-multiple instruction. COUNT is the number of stores in
13782 the instruction; REGS and MEMS are arrays containing the operands.
13783 BASEREG is the base register to be used in addressing the memory operands.
13784 WBACK_OFFSET is nonzero if the instruction should update the base
13785 register. */
13787 static rtx
13788 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13789 HOST_WIDE_INT wback_offset)
13791 int i = 0, j;
13792 rtx result;
13794 if (GET_CODE (basereg) == PLUS)
13795 basereg = XEXP (basereg, 0);
13797 if (!multiple_operation_profitable_p (false, count, 0))
13799 rtx seq;
13801 start_sequence ();
13803 for (i = 0; i < count; i++)
13804 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13806 if (wback_offset != 0)
13807 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13809 seq = get_insns ();
13810 end_sequence ();
13812 return seq;
13815 result = gen_rtx_PARALLEL (VOIDmode,
13816 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13817 if (wback_offset != 0)
13819 XVECEXP (result, 0, 0)
13820 = gen_rtx_SET (VOIDmode, basereg,
13821 plus_constant (Pmode, basereg, wback_offset));
13822 i = 1;
13823 count++;
13826 for (j = 0; i < count; i++, j++)
13827 XVECEXP (result, 0, i)
13828 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13830 return result;
13833 /* Generate either a load-multiple or a store-multiple instruction. This
13834 function can be used in situations where we can start with a single MEM
13835 rtx and adjust its address upwards.
13836 COUNT is the number of operations in the instruction, not counting a
13837 possible update of the base register. REGS is an array containing the
13838 register operands.
13839 BASEREG is the base register to be used in addressing the memory operands,
13840 which are constructed from BASEMEM.
13841 WRITE_BACK specifies whether the generated instruction should include an
13842 update of the base register.
13843 OFFSETP is used to pass an offset to and from this function; this offset
13844 is not used when constructing the address (instead BASEMEM should have an
13845 appropriate offset in its address), it is used only for setting
13846 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13848 static rtx
13849 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13850 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13852 rtx mems[MAX_LDM_STM_OPS];
13853 HOST_WIDE_INT offset = *offsetp;
13854 int i;
13856 gcc_assert (count <= MAX_LDM_STM_OPS);
13858 if (GET_CODE (basereg) == PLUS)
13859 basereg = XEXP (basereg, 0);
13861 for (i = 0; i < count; i++)
13863 rtx addr = plus_constant (Pmode, basereg, i * 4);
13864 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13865 offset += 4;
13868 if (write_back)
13869 *offsetp = offset;
13871 if (is_load)
13872 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13873 write_back ? 4 * count : 0);
13874 else
13875 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13876 write_back ? 4 * count : 0);
13880 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13881 rtx basemem, HOST_WIDE_INT *offsetp)
13883 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13884 offsetp);
13888 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13889 rtx basemem, HOST_WIDE_INT *offsetp)
13891 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13892 offsetp);
13895 /* Called from a peephole2 expander to turn a sequence of loads into an
13896 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13897 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13898 is true if we can reorder the registers because they are used commutatively
13899 subsequently.
13900 Returns true iff we could generate a new instruction. */
13902 bool
13903 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13905 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13906 rtx mems[MAX_LDM_STM_OPS];
13907 int i, j, base_reg;
13908 rtx base_reg_rtx;
13909 HOST_WIDE_INT offset;
13910 int write_back = FALSE;
13911 int ldm_case;
13912 rtx addr;
13914 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13915 &base_reg, &offset, !sort_regs);
13917 if (ldm_case == 0)
13918 return false;
13920 if (sort_regs)
13921 for (i = 0; i < nops - 1; i++)
13922 for (j = i + 1; j < nops; j++)
13923 if (regs[i] > regs[j])
13925 int t = regs[i];
13926 regs[i] = regs[j];
13927 regs[j] = t;
13929 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13931 if (TARGET_THUMB1)
13933 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13934 gcc_assert (ldm_case == 1 || ldm_case == 5);
13935 write_back = TRUE;
13938 if (ldm_case == 5)
13940 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13941 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13942 offset = 0;
13943 if (!TARGET_THUMB1)
13945 base_reg = regs[0];
13946 base_reg_rtx = newbase;
13950 for (i = 0; i < nops; i++)
13952 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13953 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13954 SImode, addr, 0);
13956 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13957 write_back ? offset + i * 4 : 0));
13958 return true;
13961 /* Called from a peephole2 expander to turn a sequence of stores into an
13962 STM instruction. OPERANDS are the operands found by the peephole matcher;
13963 NOPS indicates how many separate stores we are trying to combine.
13964 Returns true iff we could generate a new instruction. */
13966 bool
13967 gen_stm_seq (rtx *operands, int nops)
13969 int i;
13970 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13971 rtx mems[MAX_LDM_STM_OPS];
13972 int base_reg;
13973 rtx base_reg_rtx;
13974 HOST_WIDE_INT offset;
13975 int write_back = FALSE;
13976 int stm_case;
13977 rtx addr;
13978 bool base_reg_dies;
13980 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13981 mem_order, &base_reg, &offset, true);
13983 if (stm_case == 0)
13984 return false;
13986 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13988 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13989 if (TARGET_THUMB1)
13991 gcc_assert (base_reg_dies);
13992 write_back = TRUE;
13995 if (stm_case == 5)
13997 gcc_assert (base_reg_dies);
13998 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13999 offset = 0;
14002 addr = plus_constant (Pmode, base_reg_rtx, offset);
14004 for (i = 0; i < nops; i++)
14006 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14007 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14008 SImode, addr, 0);
14010 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14011 write_back ? offset + i * 4 : 0));
14012 return true;
14015 /* Called from a peephole2 expander to turn a sequence of stores that are
14016 preceded by constant loads into an STM instruction. OPERANDS are the
14017 operands found by the peephole matcher; NOPS indicates how many
14018 separate stores we are trying to combine; there are 2 * NOPS
14019 instructions in the peephole.
14020 Returns true iff we could generate a new instruction. */
14022 bool
14023 gen_const_stm_seq (rtx *operands, int nops)
14025 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14026 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14027 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14028 rtx mems[MAX_LDM_STM_OPS];
14029 int base_reg;
14030 rtx base_reg_rtx;
14031 HOST_WIDE_INT offset;
14032 int write_back = FALSE;
14033 int stm_case;
14034 rtx addr;
14035 bool base_reg_dies;
14036 int i, j;
14037 HARD_REG_SET allocated;
14039 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14040 mem_order, &base_reg, &offset, false);
14042 if (stm_case == 0)
14043 return false;
14045 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14047 /* If the same register is used more than once, try to find a free
14048 register. */
14049 CLEAR_HARD_REG_SET (allocated);
14050 for (i = 0; i < nops; i++)
14052 for (j = i + 1; j < nops; j++)
14053 if (regs[i] == regs[j])
14055 rtx t = peep2_find_free_register (0, nops * 2,
14056 TARGET_THUMB1 ? "l" : "r",
14057 SImode, &allocated);
14058 if (t == NULL_RTX)
14059 return false;
14060 reg_rtxs[i] = t;
14061 regs[i] = REGNO (t);
14065 /* Compute an ordering that maps the register numbers to an ascending
14066 sequence. */
14067 reg_order[0] = 0;
14068 for (i = 0; i < nops; i++)
14069 if (regs[i] < regs[reg_order[0]])
14070 reg_order[0] = i;
14072 for (i = 1; i < nops; i++)
14074 int this_order = reg_order[i - 1];
14075 for (j = 0; j < nops; j++)
14076 if (regs[j] > regs[reg_order[i - 1]]
14077 && (this_order == reg_order[i - 1]
14078 || regs[j] < regs[this_order]))
14079 this_order = j;
14080 reg_order[i] = this_order;
14083 /* Ensure that registers that must be live after the instruction end
14084 up with the correct value. */
14085 for (i = 0; i < nops; i++)
14087 int this_order = reg_order[i];
14088 if ((this_order != mem_order[i]
14089 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14090 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14091 return false;
14094 /* Load the constants. */
14095 for (i = 0; i < nops; i++)
14097 rtx op = operands[2 * nops + mem_order[i]];
14098 sorted_regs[i] = regs[reg_order[i]];
14099 emit_move_insn (reg_rtxs[reg_order[i]], op);
14102 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14104 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14105 if (TARGET_THUMB1)
14107 gcc_assert (base_reg_dies);
14108 write_back = TRUE;
14111 if (stm_case == 5)
14113 gcc_assert (base_reg_dies);
14114 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14115 offset = 0;
14118 addr = plus_constant (Pmode, base_reg_rtx, offset);
14120 for (i = 0; i < nops; i++)
14122 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14123 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14124 SImode, addr, 0);
14126 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14127 write_back ? offset + i * 4 : 0));
14128 return true;
14131 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14132 unaligned copies on processors which support unaligned semantics for those
14133 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14134 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14135 An interleave factor of 1 (the minimum) will perform no interleaving.
14136 Load/store multiple are used for aligned addresses where possible. */
14138 static void
14139 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14140 HOST_WIDE_INT length,
14141 unsigned int interleave_factor)
14143 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14144 int *regnos = XALLOCAVEC (int, interleave_factor);
14145 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14146 HOST_WIDE_INT i, j;
14147 HOST_WIDE_INT remaining = length, words;
14148 rtx halfword_tmp = NULL, byte_tmp = NULL;
14149 rtx dst, src;
14150 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14151 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14152 HOST_WIDE_INT srcoffset, dstoffset;
14153 HOST_WIDE_INT src_autoinc, dst_autoinc;
14154 rtx mem, addr;
14156 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14158 /* Use hard registers if we have aligned source or destination so we can use
14159 load/store multiple with contiguous registers. */
14160 if (dst_aligned || src_aligned)
14161 for (i = 0; i < interleave_factor; i++)
14162 regs[i] = gen_rtx_REG (SImode, i);
14163 else
14164 for (i = 0; i < interleave_factor; i++)
14165 regs[i] = gen_reg_rtx (SImode);
14167 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14168 src = copy_addr_to_reg (XEXP (srcbase, 0));
14170 srcoffset = dstoffset = 0;
14172 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14173 For copying the last bytes we want to subtract this offset again. */
14174 src_autoinc = dst_autoinc = 0;
14176 for (i = 0; i < interleave_factor; i++)
14177 regnos[i] = i;
14179 /* Copy BLOCK_SIZE_BYTES chunks. */
14181 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14183 /* Load words. */
14184 if (src_aligned && interleave_factor > 1)
14186 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14187 TRUE, srcbase, &srcoffset));
14188 src_autoinc += UNITS_PER_WORD * interleave_factor;
14190 else
14192 for (j = 0; j < interleave_factor; j++)
14194 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14195 - src_autoinc));
14196 mem = adjust_automodify_address (srcbase, SImode, addr,
14197 srcoffset + j * UNITS_PER_WORD);
14198 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14200 srcoffset += block_size_bytes;
14203 /* Store words. */
14204 if (dst_aligned && interleave_factor > 1)
14206 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14207 TRUE, dstbase, &dstoffset));
14208 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14210 else
14212 for (j = 0; j < interleave_factor; j++)
14214 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14215 - dst_autoinc));
14216 mem = adjust_automodify_address (dstbase, SImode, addr,
14217 dstoffset + j * UNITS_PER_WORD);
14218 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14220 dstoffset += block_size_bytes;
14223 remaining -= block_size_bytes;
14226 /* Copy any whole words left (note these aren't interleaved with any
14227 subsequent halfword/byte load/stores in the interests of simplicity). */
14229 words = remaining / UNITS_PER_WORD;
14231 gcc_assert (words < interleave_factor);
14233 if (src_aligned && words > 1)
14235 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14236 &srcoffset));
14237 src_autoinc += UNITS_PER_WORD * words;
14239 else
14241 for (j = 0; j < words; j++)
14243 addr = plus_constant (Pmode, src,
14244 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14245 mem = adjust_automodify_address (srcbase, SImode, addr,
14246 srcoffset + j * UNITS_PER_WORD);
14247 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14249 srcoffset += words * UNITS_PER_WORD;
14252 if (dst_aligned && words > 1)
14254 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14255 &dstoffset));
14256 dst_autoinc += words * UNITS_PER_WORD;
14258 else
14260 for (j = 0; j < words; j++)
14262 addr = plus_constant (Pmode, dst,
14263 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14264 mem = adjust_automodify_address (dstbase, SImode, addr,
14265 dstoffset + j * UNITS_PER_WORD);
14266 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14268 dstoffset += words * UNITS_PER_WORD;
14271 remaining -= words * UNITS_PER_WORD;
14273 gcc_assert (remaining < 4);
14275 /* Copy a halfword if necessary. */
14277 if (remaining >= 2)
14279 halfword_tmp = gen_reg_rtx (SImode);
14281 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14282 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14283 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14285 /* Either write out immediately, or delay until we've loaded the last
14286 byte, depending on interleave factor. */
14287 if (interleave_factor == 1)
14289 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14290 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14291 emit_insn (gen_unaligned_storehi (mem,
14292 gen_lowpart (HImode, halfword_tmp)));
14293 halfword_tmp = NULL;
14294 dstoffset += 2;
14297 remaining -= 2;
14298 srcoffset += 2;
14301 gcc_assert (remaining < 2);
14303 /* Copy last byte. */
14305 if ((remaining & 1) != 0)
14307 byte_tmp = gen_reg_rtx (SImode);
14309 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14310 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14311 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14313 if (interleave_factor == 1)
14315 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14316 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14317 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14318 byte_tmp = NULL;
14319 dstoffset++;
14322 remaining--;
14323 srcoffset++;
14326 /* Store last halfword if we haven't done so already. */
14328 if (halfword_tmp)
14330 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14331 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14332 emit_insn (gen_unaligned_storehi (mem,
14333 gen_lowpart (HImode, halfword_tmp)));
14334 dstoffset += 2;
14337 /* Likewise for last byte. */
14339 if (byte_tmp)
14341 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14342 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14343 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14344 dstoffset++;
14347 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14350 /* From mips_adjust_block_mem:
14352 Helper function for doing a loop-based block operation on memory
14353 reference MEM. Each iteration of the loop will operate on LENGTH
14354 bytes of MEM.
14356 Create a new base register for use within the loop and point it to
14357 the start of MEM. Create a new memory reference that uses this
14358 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14360 static void
14361 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14362 rtx *loop_mem)
14364 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14366 /* Although the new mem does not refer to a known location,
14367 it does keep up to LENGTH bytes of alignment. */
14368 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14369 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14372 /* From mips_block_move_loop:
14374 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14375 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14376 the memory regions do not overlap. */
14378 static void
14379 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14380 unsigned int interleave_factor,
14381 HOST_WIDE_INT bytes_per_iter)
14383 rtx label, src_reg, dest_reg, final_src, test;
14384 HOST_WIDE_INT leftover;
14386 leftover = length % bytes_per_iter;
14387 length -= leftover;
14389 /* Create registers and memory references for use within the loop. */
14390 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14391 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14393 /* Calculate the value that SRC_REG should have after the last iteration of
14394 the loop. */
14395 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14396 0, 0, OPTAB_WIDEN);
14398 /* Emit the start of the loop. */
14399 label = gen_label_rtx ();
14400 emit_label (label);
14402 /* Emit the loop body. */
14403 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14404 interleave_factor);
14406 /* Move on to the next block. */
14407 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14408 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14410 /* Emit the loop condition. */
14411 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14412 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14414 /* Mop up any left-over bytes. */
14415 if (leftover)
14416 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14419 /* Emit a block move when either the source or destination is unaligned (not
14420 aligned to a four-byte boundary). This may need further tuning depending on
14421 core type, optimize_size setting, etc. */
14423 static int
14424 arm_movmemqi_unaligned (rtx *operands)
14426 HOST_WIDE_INT length = INTVAL (operands[2]);
14428 if (optimize_size)
14430 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14431 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14432 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14433 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14434 or dst_aligned though: allow more interleaving in those cases since the
14435 resulting code can be smaller. */
14436 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14437 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14439 if (length > 12)
14440 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14441 interleave_factor, bytes_per_iter);
14442 else
14443 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14444 interleave_factor);
14446 else
14448 /* Note that the loop created by arm_block_move_unaligned_loop may be
14449 subject to loop unrolling, which makes tuning this condition a little
14450 redundant. */
14451 if (length > 32)
14452 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14453 else
14454 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14457 return 1;
14461 arm_gen_movmemqi (rtx *operands)
14463 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14464 HOST_WIDE_INT srcoffset, dstoffset;
14465 int i;
14466 rtx src, dst, srcbase, dstbase;
14467 rtx part_bytes_reg = NULL;
14468 rtx mem;
14470 if (!CONST_INT_P (operands[2])
14471 || !CONST_INT_P (operands[3])
14472 || INTVAL (operands[2]) > 64)
14473 return 0;
14475 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14476 return arm_movmemqi_unaligned (operands);
14478 if (INTVAL (operands[3]) & 3)
14479 return 0;
14481 dstbase = operands[0];
14482 srcbase = operands[1];
14484 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14485 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14487 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14488 out_words_to_go = INTVAL (operands[2]) / 4;
14489 last_bytes = INTVAL (operands[2]) & 3;
14490 dstoffset = srcoffset = 0;
14492 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14493 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14495 for (i = 0; in_words_to_go >= 2; i+=4)
14497 if (in_words_to_go > 4)
14498 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14499 TRUE, srcbase, &srcoffset));
14500 else
14501 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14502 src, FALSE, srcbase,
14503 &srcoffset));
14505 if (out_words_to_go)
14507 if (out_words_to_go > 4)
14508 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14509 TRUE, dstbase, &dstoffset));
14510 else if (out_words_to_go != 1)
14511 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14512 out_words_to_go, dst,
14513 (last_bytes == 0
14514 ? FALSE : TRUE),
14515 dstbase, &dstoffset));
14516 else
14518 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14519 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14520 if (last_bytes != 0)
14522 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14523 dstoffset += 4;
14528 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14529 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14532 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14533 if (out_words_to_go)
14535 rtx sreg;
14537 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14538 sreg = copy_to_reg (mem);
14540 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14541 emit_move_insn (mem, sreg);
14542 in_words_to_go--;
14544 gcc_assert (!in_words_to_go); /* Sanity check */
14547 if (in_words_to_go)
14549 gcc_assert (in_words_to_go > 0);
14551 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14552 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14555 gcc_assert (!last_bytes || part_bytes_reg);
14557 if (BYTES_BIG_ENDIAN && last_bytes)
14559 rtx tmp = gen_reg_rtx (SImode);
14561 /* The bytes we want are in the top end of the word. */
14562 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14563 GEN_INT (8 * (4 - last_bytes))));
14564 part_bytes_reg = tmp;
14566 while (last_bytes)
14568 mem = adjust_automodify_address (dstbase, QImode,
14569 plus_constant (Pmode, dst,
14570 last_bytes - 1),
14571 dstoffset + last_bytes - 1);
14572 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14574 if (--last_bytes)
14576 tmp = gen_reg_rtx (SImode);
14577 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14578 part_bytes_reg = tmp;
14583 else
14585 if (last_bytes > 1)
14587 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14588 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14589 last_bytes -= 2;
14590 if (last_bytes)
14592 rtx tmp = gen_reg_rtx (SImode);
14593 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14594 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14595 part_bytes_reg = tmp;
14596 dstoffset += 2;
14600 if (last_bytes)
14602 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14603 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14607 return 1;
14610 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14611 by mode size. */
14612 inline static rtx
14613 next_consecutive_mem (rtx mem)
14615 enum machine_mode mode = GET_MODE (mem);
14616 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14617 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14619 return adjust_automodify_address (mem, mode, addr, offset);
14622 /* Copy using LDRD/STRD instructions whenever possible.
14623 Returns true upon success. */
14624 bool
14625 gen_movmem_ldrd_strd (rtx *operands)
14627 unsigned HOST_WIDE_INT len;
14628 HOST_WIDE_INT align;
14629 rtx src, dst, base;
14630 rtx reg0;
14631 bool src_aligned, dst_aligned;
14632 bool src_volatile, dst_volatile;
14634 gcc_assert (CONST_INT_P (operands[2]));
14635 gcc_assert (CONST_INT_P (operands[3]));
14637 len = UINTVAL (operands[2]);
14638 if (len > 64)
14639 return false;
14641 /* Maximum alignment we can assume for both src and dst buffers. */
14642 align = INTVAL (operands[3]);
14644 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14645 return false;
14647 /* Place src and dst addresses in registers
14648 and update the corresponding mem rtx. */
14649 dst = operands[0];
14650 dst_volatile = MEM_VOLATILE_P (dst);
14651 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14652 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14653 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14655 src = operands[1];
14656 src_volatile = MEM_VOLATILE_P (src);
14657 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14658 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14659 src = adjust_automodify_address (src, VOIDmode, base, 0);
14661 if (!unaligned_access && !(src_aligned && dst_aligned))
14662 return false;
14664 if (src_volatile || dst_volatile)
14665 return false;
14667 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14668 if (!(dst_aligned || src_aligned))
14669 return arm_gen_movmemqi (operands);
14671 src = adjust_address (src, DImode, 0);
14672 dst = adjust_address (dst, DImode, 0);
14673 while (len >= 8)
14675 len -= 8;
14676 reg0 = gen_reg_rtx (DImode);
14677 if (src_aligned)
14678 emit_move_insn (reg0, src);
14679 else
14680 emit_insn (gen_unaligned_loaddi (reg0, src));
14682 if (dst_aligned)
14683 emit_move_insn (dst, reg0);
14684 else
14685 emit_insn (gen_unaligned_storedi (dst, reg0));
14687 src = next_consecutive_mem (src);
14688 dst = next_consecutive_mem (dst);
14691 gcc_assert (len < 8);
14692 if (len >= 4)
14694 /* More than a word but less than a double-word to copy. Copy a word. */
14695 reg0 = gen_reg_rtx (SImode);
14696 src = adjust_address (src, SImode, 0);
14697 dst = adjust_address (dst, SImode, 0);
14698 if (src_aligned)
14699 emit_move_insn (reg0, src);
14700 else
14701 emit_insn (gen_unaligned_loadsi (reg0, src));
14703 if (dst_aligned)
14704 emit_move_insn (dst, reg0);
14705 else
14706 emit_insn (gen_unaligned_storesi (dst, reg0));
14708 src = next_consecutive_mem (src);
14709 dst = next_consecutive_mem (dst);
14710 len -= 4;
14713 if (len == 0)
14714 return true;
14716 /* Copy the remaining bytes. */
14717 if (len >= 2)
14719 dst = adjust_address (dst, HImode, 0);
14720 src = adjust_address (src, HImode, 0);
14721 reg0 = gen_reg_rtx (SImode);
14722 if (src_aligned)
14723 emit_insn (gen_zero_extendhisi2 (reg0, src));
14724 else
14725 emit_insn (gen_unaligned_loadhiu (reg0, src));
14727 if (dst_aligned)
14728 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14729 else
14730 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14732 src = next_consecutive_mem (src);
14733 dst = next_consecutive_mem (dst);
14734 if (len == 2)
14735 return true;
14738 dst = adjust_address (dst, QImode, 0);
14739 src = adjust_address (src, QImode, 0);
14740 reg0 = gen_reg_rtx (QImode);
14741 emit_move_insn (reg0, src);
14742 emit_move_insn (dst, reg0);
14743 return true;
14746 /* Select a dominance comparison mode if possible for a test of the general
14747 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14748 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14749 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14750 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14751 In all cases OP will be either EQ or NE, but we don't need to know which
14752 here. If we are unable to support a dominance comparison we return
14753 CC mode. This will then fail to match for the RTL expressions that
14754 generate this call. */
14755 enum machine_mode
14756 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14758 enum rtx_code cond1, cond2;
14759 int swapped = 0;
14761 /* Currently we will probably get the wrong result if the individual
14762 comparisons are not simple. This also ensures that it is safe to
14763 reverse a comparison if necessary. */
14764 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14765 != CCmode)
14766 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14767 != CCmode))
14768 return CCmode;
14770 /* The if_then_else variant of this tests the second condition if the
14771 first passes, but is true if the first fails. Reverse the first
14772 condition to get a true "inclusive-or" expression. */
14773 if (cond_or == DOM_CC_NX_OR_Y)
14774 cond1 = reverse_condition (cond1);
14776 /* If the comparisons are not equal, and one doesn't dominate the other,
14777 then we can't do this. */
14778 if (cond1 != cond2
14779 && !comparison_dominates_p (cond1, cond2)
14780 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14781 return CCmode;
14783 if (swapped)
14785 enum rtx_code temp = cond1;
14786 cond1 = cond2;
14787 cond2 = temp;
14790 switch (cond1)
14792 case EQ:
14793 if (cond_or == DOM_CC_X_AND_Y)
14794 return CC_DEQmode;
14796 switch (cond2)
14798 case EQ: return CC_DEQmode;
14799 case LE: return CC_DLEmode;
14800 case LEU: return CC_DLEUmode;
14801 case GE: return CC_DGEmode;
14802 case GEU: return CC_DGEUmode;
14803 default: gcc_unreachable ();
14806 case LT:
14807 if (cond_or == DOM_CC_X_AND_Y)
14808 return CC_DLTmode;
14810 switch (cond2)
14812 case LT:
14813 return CC_DLTmode;
14814 case LE:
14815 return CC_DLEmode;
14816 case NE:
14817 return CC_DNEmode;
14818 default:
14819 gcc_unreachable ();
14822 case GT:
14823 if (cond_or == DOM_CC_X_AND_Y)
14824 return CC_DGTmode;
14826 switch (cond2)
14828 case GT:
14829 return CC_DGTmode;
14830 case GE:
14831 return CC_DGEmode;
14832 case NE:
14833 return CC_DNEmode;
14834 default:
14835 gcc_unreachable ();
14838 case LTU:
14839 if (cond_or == DOM_CC_X_AND_Y)
14840 return CC_DLTUmode;
14842 switch (cond2)
14844 case LTU:
14845 return CC_DLTUmode;
14846 case LEU:
14847 return CC_DLEUmode;
14848 case NE:
14849 return CC_DNEmode;
14850 default:
14851 gcc_unreachable ();
14854 case GTU:
14855 if (cond_or == DOM_CC_X_AND_Y)
14856 return CC_DGTUmode;
14858 switch (cond2)
14860 case GTU:
14861 return CC_DGTUmode;
14862 case GEU:
14863 return CC_DGEUmode;
14864 case NE:
14865 return CC_DNEmode;
14866 default:
14867 gcc_unreachable ();
14870 /* The remaining cases only occur when both comparisons are the
14871 same. */
14872 case NE:
14873 gcc_assert (cond1 == cond2);
14874 return CC_DNEmode;
14876 case LE:
14877 gcc_assert (cond1 == cond2);
14878 return CC_DLEmode;
14880 case GE:
14881 gcc_assert (cond1 == cond2);
14882 return CC_DGEmode;
14884 case LEU:
14885 gcc_assert (cond1 == cond2);
14886 return CC_DLEUmode;
14888 case GEU:
14889 gcc_assert (cond1 == cond2);
14890 return CC_DGEUmode;
14892 default:
14893 gcc_unreachable ();
14897 enum machine_mode
14898 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14900 /* All floating point compares return CCFP if it is an equality
14901 comparison, and CCFPE otherwise. */
14902 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14904 switch (op)
14906 case EQ:
14907 case NE:
14908 case UNORDERED:
14909 case ORDERED:
14910 case UNLT:
14911 case UNLE:
14912 case UNGT:
14913 case UNGE:
14914 case UNEQ:
14915 case LTGT:
14916 return CCFPmode;
14918 case LT:
14919 case LE:
14920 case GT:
14921 case GE:
14922 return CCFPEmode;
14924 default:
14925 gcc_unreachable ();
14929 /* A compare with a shifted operand. Because of canonicalization, the
14930 comparison will have to be swapped when we emit the assembler. */
14931 if (GET_MODE (y) == SImode
14932 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14933 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14934 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14935 || GET_CODE (x) == ROTATERT))
14936 return CC_SWPmode;
14938 /* This operation is performed swapped, but since we only rely on the Z
14939 flag we don't need an additional mode. */
14940 if (GET_MODE (y) == SImode
14941 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14942 && GET_CODE (x) == NEG
14943 && (op == EQ || op == NE))
14944 return CC_Zmode;
14946 /* This is a special case that is used by combine to allow a
14947 comparison of a shifted byte load to be split into a zero-extend
14948 followed by a comparison of the shifted integer (only valid for
14949 equalities and unsigned inequalities). */
14950 if (GET_MODE (x) == SImode
14951 && GET_CODE (x) == ASHIFT
14952 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14953 && GET_CODE (XEXP (x, 0)) == SUBREG
14954 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14955 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14956 && (op == EQ || op == NE
14957 || op == GEU || op == GTU || op == LTU || op == LEU)
14958 && CONST_INT_P (y))
14959 return CC_Zmode;
14961 /* A construct for a conditional compare, if the false arm contains
14962 0, then both conditions must be true, otherwise either condition
14963 must be true. Not all conditions are possible, so CCmode is
14964 returned if it can't be done. */
14965 if (GET_CODE (x) == IF_THEN_ELSE
14966 && (XEXP (x, 2) == const0_rtx
14967 || XEXP (x, 2) == const1_rtx)
14968 && COMPARISON_P (XEXP (x, 0))
14969 && COMPARISON_P (XEXP (x, 1)))
14970 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14971 INTVAL (XEXP (x, 2)));
14973 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14974 if (GET_CODE (x) == AND
14975 && (op == EQ || op == NE)
14976 && COMPARISON_P (XEXP (x, 0))
14977 && COMPARISON_P (XEXP (x, 1)))
14978 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14979 DOM_CC_X_AND_Y);
14981 if (GET_CODE (x) == IOR
14982 && (op == EQ || op == NE)
14983 && COMPARISON_P (XEXP (x, 0))
14984 && COMPARISON_P (XEXP (x, 1)))
14985 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14986 DOM_CC_X_OR_Y);
14988 /* An operation (on Thumb) where we want to test for a single bit.
14989 This is done by shifting that bit up into the top bit of a
14990 scratch register; we can then branch on the sign bit. */
14991 if (TARGET_THUMB1
14992 && GET_MODE (x) == SImode
14993 && (op == EQ || op == NE)
14994 && GET_CODE (x) == ZERO_EXTRACT
14995 && XEXP (x, 1) == const1_rtx)
14996 return CC_Nmode;
14998 /* An operation that sets the condition codes as a side-effect, the
14999 V flag is not set correctly, so we can only use comparisons where
15000 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15001 instead.) */
15002 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15003 if (GET_MODE (x) == SImode
15004 && y == const0_rtx
15005 && (op == EQ || op == NE || op == LT || op == GE)
15006 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15007 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15008 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15009 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15010 || GET_CODE (x) == LSHIFTRT
15011 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15012 || GET_CODE (x) == ROTATERT
15013 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15014 return CC_NOOVmode;
15016 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15017 return CC_Zmode;
15019 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15020 && GET_CODE (x) == PLUS
15021 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15022 return CC_Cmode;
15024 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15026 switch (op)
15028 case EQ:
15029 case NE:
15030 /* A DImode comparison against zero can be implemented by
15031 or'ing the two halves together. */
15032 if (y == const0_rtx)
15033 return CC_Zmode;
15035 /* We can do an equality test in three Thumb instructions. */
15036 if (!TARGET_32BIT)
15037 return CC_Zmode;
15039 /* FALLTHROUGH */
15041 case LTU:
15042 case LEU:
15043 case GTU:
15044 case GEU:
15045 /* DImode unsigned comparisons can be implemented by cmp +
15046 cmpeq without a scratch register. Not worth doing in
15047 Thumb-2. */
15048 if (TARGET_32BIT)
15049 return CC_CZmode;
15051 /* FALLTHROUGH */
15053 case LT:
15054 case LE:
15055 case GT:
15056 case GE:
15057 /* DImode signed and unsigned comparisons can be implemented
15058 by cmp + sbcs with a scratch register, but that does not
15059 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15060 gcc_assert (op != EQ && op != NE);
15061 return CC_NCVmode;
15063 default:
15064 gcc_unreachable ();
15068 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15069 return GET_MODE (x);
15071 return CCmode;
15074 /* X and Y are two things to compare using CODE. Emit the compare insn and
15075 return the rtx for register 0 in the proper mode. FP means this is a
15076 floating point compare: I don't think that it is needed on the arm. */
15078 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15080 enum machine_mode mode;
15081 rtx cc_reg;
15082 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15084 /* We might have X as a constant, Y as a register because of the predicates
15085 used for cmpdi. If so, force X to a register here. */
15086 if (dimode_comparison && !REG_P (x))
15087 x = force_reg (DImode, x);
15089 mode = SELECT_CC_MODE (code, x, y);
15090 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15092 if (dimode_comparison
15093 && mode != CC_CZmode)
15095 rtx clobber, set;
15097 /* To compare two non-zero values for equality, XOR them and
15098 then compare against zero. Not used for ARM mode; there
15099 CC_CZmode is cheaper. */
15100 if (mode == CC_Zmode && y != const0_rtx)
15102 gcc_assert (!reload_completed);
15103 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15104 y = const0_rtx;
15107 /* A scratch register is required. */
15108 if (reload_completed)
15109 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15110 else
15111 scratch = gen_rtx_SCRATCH (SImode);
15113 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15114 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15115 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15117 else
15118 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15120 return cc_reg;
15123 /* Generate a sequence of insns that will generate the correct return
15124 address mask depending on the physical architecture that the program
15125 is running on. */
15127 arm_gen_return_addr_mask (void)
15129 rtx reg = gen_reg_rtx (Pmode);
15131 emit_insn (gen_return_addr_mask (reg));
15132 return reg;
15135 void
15136 arm_reload_in_hi (rtx *operands)
15138 rtx ref = operands[1];
15139 rtx base, scratch;
15140 HOST_WIDE_INT offset = 0;
15142 if (GET_CODE (ref) == SUBREG)
15144 offset = SUBREG_BYTE (ref);
15145 ref = SUBREG_REG (ref);
15148 if (REG_P (ref))
15150 /* We have a pseudo which has been spilt onto the stack; there
15151 are two cases here: the first where there is a simple
15152 stack-slot replacement and a second where the stack-slot is
15153 out of range, or is used as a subreg. */
15154 if (reg_equiv_mem (REGNO (ref)))
15156 ref = reg_equiv_mem (REGNO (ref));
15157 base = find_replacement (&XEXP (ref, 0));
15159 else
15160 /* The slot is out of range, or was dressed up in a SUBREG. */
15161 base = reg_equiv_address (REGNO (ref));
15163 else
15164 base = find_replacement (&XEXP (ref, 0));
15166 /* Handle the case where the address is too complex to be offset by 1. */
15167 if (GET_CODE (base) == MINUS
15168 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15170 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15172 emit_set_insn (base_plus, base);
15173 base = base_plus;
15175 else if (GET_CODE (base) == PLUS)
15177 /* The addend must be CONST_INT, or we would have dealt with it above. */
15178 HOST_WIDE_INT hi, lo;
15180 offset += INTVAL (XEXP (base, 1));
15181 base = XEXP (base, 0);
15183 /* Rework the address into a legal sequence of insns. */
15184 /* Valid range for lo is -4095 -> 4095 */
15185 lo = (offset >= 0
15186 ? (offset & 0xfff)
15187 : -((-offset) & 0xfff));
15189 /* Corner case, if lo is the max offset then we would be out of range
15190 once we have added the additional 1 below, so bump the msb into the
15191 pre-loading insn(s). */
15192 if (lo == 4095)
15193 lo &= 0x7ff;
15195 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15196 ^ (HOST_WIDE_INT) 0x80000000)
15197 - (HOST_WIDE_INT) 0x80000000);
15199 gcc_assert (hi + lo == offset);
15201 if (hi != 0)
15203 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15205 /* Get the base address; addsi3 knows how to handle constants
15206 that require more than one insn. */
15207 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15208 base = base_plus;
15209 offset = lo;
15213 /* Operands[2] may overlap operands[0] (though it won't overlap
15214 operands[1]), that's why we asked for a DImode reg -- so we can
15215 use the bit that does not overlap. */
15216 if (REGNO (operands[2]) == REGNO (operands[0]))
15217 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15218 else
15219 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15221 emit_insn (gen_zero_extendqisi2 (scratch,
15222 gen_rtx_MEM (QImode,
15223 plus_constant (Pmode, base,
15224 offset))));
15225 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15226 gen_rtx_MEM (QImode,
15227 plus_constant (Pmode, base,
15228 offset + 1))));
15229 if (!BYTES_BIG_ENDIAN)
15230 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15231 gen_rtx_IOR (SImode,
15232 gen_rtx_ASHIFT
15233 (SImode,
15234 gen_rtx_SUBREG (SImode, operands[0], 0),
15235 GEN_INT (8)),
15236 scratch));
15237 else
15238 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15239 gen_rtx_IOR (SImode,
15240 gen_rtx_ASHIFT (SImode, scratch,
15241 GEN_INT (8)),
15242 gen_rtx_SUBREG (SImode, operands[0], 0)));
15245 /* Handle storing a half-word to memory during reload by synthesizing as two
15246 byte stores. Take care not to clobber the input values until after we
15247 have moved them somewhere safe. This code assumes that if the DImode
15248 scratch in operands[2] overlaps either the input value or output address
15249 in some way, then that value must die in this insn (we absolutely need
15250 two scratch registers for some corner cases). */
15251 void
15252 arm_reload_out_hi (rtx *operands)
15254 rtx ref = operands[0];
15255 rtx outval = operands[1];
15256 rtx base, scratch;
15257 HOST_WIDE_INT offset = 0;
15259 if (GET_CODE (ref) == SUBREG)
15261 offset = SUBREG_BYTE (ref);
15262 ref = SUBREG_REG (ref);
15265 if (REG_P (ref))
15267 /* We have a pseudo which has been spilt onto the stack; there
15268 are two cases here: the first where there is a simple
15269 stack-slot replacement and a second where the stack-slot is
15270 out of range, or is used as a subreg. */
15271 if (reg_equiv_mem (REGNO (ref)))
15273 ref = reg_equiv_mem (REGNO (ref));
15274 base = find_replacement (&XEXP (ref, 0));
15276 else
15277 /* The slot is out of range, or was dressed up in a SUBREG. */
15278 base = reg_equiv_address (REGNO (ref));
15280 else
15281 base = find_replacement (&XEXP (ref, 0));
15283 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15285 /* Handle the case where the address is too complex to be offset by 1. */
15286 if (GET_CODE (base) == MINUS
15287 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15289 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15291 /* Be careful not to destroy OUTVAL. */
15292 if (reg_overlap_mentioned_p (base_plus, outval))
15294 /* Updating base_plus might destroy outval, see if we can
15295 swap the scratch and base_plus. */
15296 if (!reg_overlap_mentioned_p (scratch, outval))
15298 rtx tmp = scratch;
15299 scratch = base_plus;
15300 base_plus = tmp;
15302 else
15304 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15306 /* Be conservative and copy OUTVAL into the scratch now,
15307 this should only be necessary if outval is a subreg
15308 of something larger than a word. */
15309 /* XXX Might this clobber base? I can't see how it can,
15310 since scratch is known to overlap with OUTVAL, and
15311 must be wider than a word. */
15312 emit_insn (gen_movhi (scratch_hi, outval));
15313 outval = scratch_hi;
15317 emit_set_insn (base_plus, base);
15318 base = base_plus;
15320 else if (GET_CODE (base) == PLUS)
15322 /* The addend must be CONST_INT, or we would have dealt with it above. */
15323 HOST_WIDE_INT hi, lo;
15325 offset += INTVAL (XEXP (base, 1));
15326 base = XEXP (base, 0);
15328 /* Rework the address into a legal sequence of insns. */
15329 /* Valid range for lo is -4095 -> 4095 */
15330 lo = (offset >= 0
15331 ? (offset & 0xfff)
15332 : -((-offset) & 0xfff));
15334 /* Corner case, if lo is the max offset then we would be out of range
15335 once we have added the additional 1 below, so bump the msb into the
15336 pre-loading insn(s). */
15337 if (lo == 4095)
15338 lo &= 0x7ff;
15340 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15341 ^ (HOST_WIDE_INT) 0x80000000)
15342 - (HOST_WIDE_INT) 0x80000000);
15344 gcc_assert (hi + lo == offset);
15346 if (hi != 0)
15348 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15350 /* Be careful not to destroy OUTVAL. */
15351 if (reg_overlap_mentioned_p (base_plus, outval))
15353 /* Updating base_plus might destroy outval, see if we
15354 can swap the scratch and base_plus. */
15355 if (!reg_overlap_mentioned_p (scratch, outval))
15357 rtx tmp = scratch;
15358 scratch = base_plus;
15359 base_plus = tmp;
15361 else
15363 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15365 /* Be conservative and copy outval into scratch now,
15366 this should only be necessary if outval is a
15367 subreg of something larger than a word. */
15368 /* XXX Might this clobber base? I can't see how it
15369 can, since scratch is known to overlap with
15370 outval. */
15371 emit_insn (gen_movhi (scratch_hi, outval));
15372 outval = scratch_hi;
15376 /* Get the base address; addsi3 knows how to handle constants
15377 that require more than one insn. */
15378 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15379 base = base_plus;
15380 offset = lo;
15384 if (BYTES_BIG_ENDIAN)
15386 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15387 plus_constant (Pmode, base,
15388 offset + 1)),
15389 gen_lowpart (QImode, outval)));
15390 emit_insn (gen_lshrsi3 (scratch,
15391 gen_rtx_SUBREG (SImode, outval, 0),
15392 GEN_INT (8)));
15393 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15394 offset)),
15395 gen_lowpart (QImode, scratch)));
15397 else
15399 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15400 offset)),
15401 gen_lowpart (QImode, outval)));
15402 emit_insn (gen_lshrsi3 (scratch,
15403 gen_rtx_SUBREG (SImode, outval, 0),
15404 GEN_INT (8)));
15405 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15406 plus_constant (Pmode, base,
15407 offset + 1)),
15408 gen_lowpart (QImode, scratch)));
15412 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15413 (padded to the size of a word) should be passed in a register. */
15415 static bool
15416 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15418 if (TARGET_AAPCS_BASED)
15419 return must_pass_in_stack_var_size (mode, type);
15420 else
15421 return must_pass_in_stack_var_size_or_pad (mode, type);
15425 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15426 Return true if an argument passed on the stack should be padded upwards,
15427 i.e. if the least-significant byte has useful data.
15428 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15429 aggregate types are placed in the lowest memory address. */
15431 bool
15432 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15434 if (!TARGET_AAPCS_BASED)
15435 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15437 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15438 return false;
15440 return true;
15444 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15445 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15446 register has useful data, and return the opposite if the most
15447 significant byte does. */
15449 bool
15450 arm_pad_reg_upward (enum machine_mode mode,
15451 tree type, int first ATTRIBUTE_UNUSED)
15453 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15455 /* For AAPCS, small aggregates, small fixed-point types,
15456 and small complex types are always padded upwards. */
15457 if (type)
15459 if ((AGGREGATE_TYPE_P (type)
15460 || TREE_CODE (type) == COMPLEX_TYPE
15461 || FIXED_POINT_TYPE_P (type))
15462 && int_size_in_bytes (type) <= 4)
15463 return true;
15465 else
15467 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15468 && GET_MODE_SIZE (mode) <= 4)
15469 return true;
15473 /* Otherwise, use default padding. */
15474 return !BYTES_BIG_ENDIAN;
15477 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15478 assuming that the address in the base register is word aligned. */
15479 bool
15480 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15482 HOST_WIDE_INT max_offset;
15484 /* Offset must be a multiple of 4 in Thumb mode. */
15485 if (TARGET_THUMB2 && ((offset & 3) != 0))
15486 return false;
15488 if (TARGET_THUMB2)
15489 max_offset = 1020;
15490 else if (TARGET_ARM)
15491 max_offset = 255;
15492 else
15493 return false;
15495 return ((offset <= max_offset) && (offset >= -max_offset));
15498 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15499 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15500 Assumes that the address in the base register RN is word aligned. Pattern
15501 guarantees that both memory accesses use the same base register,
15502 the offsets are constants within the range, and the gap between the offsets is 4.
15503 If preload complete then check that registers are legal. WBACK indicates whether
15504 address is updated. LOAD indicates whether memory access is load or store. */
15505 bool
15506 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15507 bool wback, bool load)
15509 unsigned int t, t2, n;
15511 if (!reload_completed)
15512 return true;
15514 if (!offset_ok_for_ldrd_strd (offset))
15515 return false;
15517 t = REGNO (rt);
15518 t2 = REGNO (rt2);
15519 n = REGNO (rn);
15521 if ((TARGET_THUMB2)
15522 && ((wback && (n == t || n == t2))
15523 || (t == SP_REGNUM)
15524 || (t == PC_REGNUM)
15525 || (t2 == SP_REGNUM)
15526 || (t2 == PC_REGNUM)
15527 || (!load && (n == PC_REGNUM))
15528 || (load && (t == t2))
15529 /* Triggers Cortex-M3 LDRD errata. */
15530 || (!wback && load && fix_cm3_ldrd && (n == t))))
15531 return false;
15533 if ((TARGET_ARM)
15534 && ((wback && (n == t || n == t2))
15535 || (t2 == PC_REGNUM)
15536 || (t % 2 != 0) /* First destination register is not even. */
15537 || (t2 != t + 1)
15538 /* PC can be used as base register (for offset addressing only),
15539 but it is depricated. */
15540 || (n == PC_REGNUM)))
15541 return false;
15543 return true;
15546 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15547 operand MEM's address contains an immediate offset from the base
15548 register and has no side effects, in which case it sets BASE and
15549 OFFSET accordingly. */
15550 static bool
15551 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15553 rtx addr;
15555 gcc_assert (base != NULL && offset != NULL);
15557 /* TODO: Handle more general memory operand patterns, such as
15558 PRE_DEC and PRE_INC. */
15560 if (side_effects_p (mem))
15561 return false;
15563 /* Can't deal with subregs. */
15564 if (GET_CODE (mem) == SUBREG)
15565 return false;
15567 gcc_assert (MEM_P (mem));
15569 *offset = const0_rtx;
15571 addr = XEXP (mem, 0);
15573 /* If addr isn't valid for DImode, then we can't handle it. */
15574 if (!arm_legitimate_address_p (DImode, addr,
15575 reload_in_progress || reload_completed))
15576 return false;
15578 if (REG_P (addr))
15580 *base = addr;
15581 return true;
15583 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15585 *base = XEXP (addr, 0);
15586 *offset = XEXP (addr, 1);
15587 return (REG_P (*base) && CONST_INT_P (*offset));
15590 return false;
15593 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15595 /* Called from a peephole2 to replace two word-size accesses with a
15596 single LDRD/STRD instruction. Returns true iff we can generate a
15597 new instruction sequence. That is, both accesses use the same base
15598 register and the gap between constant offsets is 4. This function
15599 may reorder its operands to match ldrd/strd RTL templates.
15600 OPERANDS are the operands found by the peephole matcher;
15601 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15602 corresponding memory operands. LOAD indicaates whether the access
15603 is load or store. CONST_STORE indicates a store of constant
15604 integer values held in OPERANDS[4,5] and assumes that the pattern
15605 is of length 4 insn, for the purpose of checking dead registers.
15606 COMMUTE indicates that register operands may be reordered. */
15607 bool
15608 gen_operands_ldrd_strd (rtx *operands, bool load,
15609 bool const_store, bool commute)
15611 int nops = 2;
15612 HOST_WIDE_INT offsets[2], offset;
15613 rtx base = NULL_RTX;
15614 rtx cur_base, cur_offset, tmp;
15615 int i, gap;
15616 HARD_REG_SET regset;
15618 gcc_assert (!const_store || !load);
15619 /* Check that the memory references are immediate offsets from the
15620 same base register. Extract the base register, the destination
15621 registers, and the corresponding memory offsets. */
15622 for (i = 0; i < nops; i++)
15624 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15625 return false;
15627 if (i == 0)
15628 base = cur_base;
15629 else if (REGNO (base) != REGNO (cur_base))
15630 return false;
15632 offsets[i] = INTVAL (cur_offset);
15633 if (GET_CODE (operands[i]) == SUBREG)
15635 tmp = SUBREG_REG (operands[i]);
15636 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15637 operands[i] = tmp;
15641 /* Make sure there is no dependency between the individual loads. */
15642 if (load && REGNO (operands[0]) == REGNO (base))
15643 return false; /* RAW */
15645 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15646 return false; /* WAW */
15648 /* If the same input register is used in both stores
15649 when storing different constants, try to find a free register.
15650 For example, the code
15651 mov r0, 0
15652 str r0, [r2]
15653 mov r0, 1
15654 str r0, [r2, #4]
15655 can be transformed into
15656 mov r1, 0
15657 strd r1, r0, [r2]
15658 in Thumb mode assuming that r1 is free. */
15659 if (const_store
15660 && REGNO (operands[0]) == REGNO (operands[1])
15661 && INTVAL (operands[4]) != INTVAL (operands[5]))
15663 if (TARGET_THUMB2)
15665 CLEAR_HARD_REG_SET (regset);
15666 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15667 if (tmp == NULL_RTX)
15668 return false;
15670 /* Use the new register in the first load to ensure that
15671 if the original input register is not dead after peephole,
15672 then it will have the correct constant value. */
15673 operands[0] = tmp;
15675 else if (TARGET_ARM)
15677 return false;
15678 int regno = REGNO (operands[0]);
15679 if (!peep2_reg_dead_p (4, operands[0]))
15681 /* When the input register is even and is not dead after the
15682 pattern, it has to hold the second constant but we cannot
15683 form a legal STRD in ARM mode with this register as the second
15684 register. */
15685 if (regno % 2 == 0)
15686 return false;
15688 /* Is regno-1 free? */
15689 SET_HARD_REG_SET (regset);
15690 CLEAR_HARD_REG_BIT(regset, regno - 1);
15691 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15692 if (tmp == NULL_RTX)
15693 return false;
15695 operands[0] = tmp;
15697 else
15699 /* Find a DImode register. */
15700 CLEAR_HARD_REG_SET (regset);
15701 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15702 if (tmp != NULL_RTX)
15704 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15705 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15707 else
15709 /* Can we use the input register to form a DI register? */
15710 SET_HARD_REG_SET (regset);
15711 CLEAR_HARD_REG_BIT(regset,
15712 regno % 2 == 0 ? regno + 1 : regno - 1);
15713 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15714 if (tmp == NULL_RTX)
15715 return false;
15716 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15720 gcc_assert (operands[0] != NULL_RTX);
15721 gcc_assert (operands[1] != NULL_RTX);
15722 gcc_assert (REGNO (operands[0]) % 2 == 0);
15723 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15727 /* Make sure the instructions are ordered with lower memory access first. */
15728 if (offsets[0] > offsets[1])
15730 gap = offsets[0] - offsets[1];
15731 offset = offsets[1];
15733 /* Swap the instructions such that lower memory is accessed first. */
15734 SWAP_RTX (operands[0], operands[1]);
15735 SWAP_RTX (operands[2], operands[3]);
15736 if (const_store)
15737 SWAP_RTX (operands[4], operands[5]);
15739 else
15741 gap = offsets[1] - offsets[0];
15742 offset = offsets[0];
15745 /* Make sure accesses are to consecutive memory locations. */
15746 if (gap != 4)
15747 return false;
15749 /* Make sure we generate legal instructions. */
15750 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15751 false, load))
15752 return true;
15754 /* In Thumb state, where registers are almost unconstrained, there
15755 is little hope to fix it. */
15756 if (TARGET_THUMB2)
15757 return false;
15759 if (load && commute)
15761 /* Try reordering registers. */
15762 SWAP_RTX (operands[0], operands[1]);
15763 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15764 false, load))
15765 return true;
15768 if (const_store)
15770 /* If input registers are dead after this pattern, they can be
15771 reordered or replaced by other registers that are free in the
15772 current pattern. */
15773 if (!peep2_reg_dead_p (4, operands[0])
15774 || !peep2_reg_dead_p (4, operands[1]))
15775 return false;
15777 /* Try to reorder the input registers. */
15778 /* For example, the code
15779 mov r0, 0
15780 mov r1, 1
15781 str r1, [r2]
15782 str r0, [r2, #4]
15783 can be transformed into
15784 mov r1, 0
15785 mov r0, 1
15786 strd r0, [r2]
15788 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15789 false, false))
15791 SWAP_RTX (operands[0], operands[1]);
15792 return true;
15795 /* Try to find a free DI register. */
15796 CLEAR_HARD_REG_SET (regset);
15797 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15798 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15799 while (true)
15801 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15802 if (tmp == NULL_RTX)
15803 return false;
15805 /* DREG must be an even-numbered register in DImode.
15806 Split it into SI registers. */
15807 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15808 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15809 gcc_assert (operands[0] != NULL_RTX);
15810 gcc_assert (operands[1] != NULL_RTX);
15811 gcc_assert (REGNO (operands[0]) % 2 == 0);
15812 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15814 return (operands_ok_ldrd_strd (operands[0], operands[1],
15815 base, offset,
15816 false, load));
15820 return false;
15822 #undef SWAP_RTX
15827 /* Print a symbolic form of X to the debug file, F. */
15828 static void
15829 arm_print_value (FILE *f, rtx x)
15831 switch (GET_CODE (x))
15833 case CONST_INT:
15834 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15835 return;
15837 case CONST_DOUBLE:
15838 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15839 return;
15841 case CONST_VECTOR:
15843 int i;
15845 fprintf (f, "<");
15846 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15848 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15849 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15850 fputc (',', f);
15852 fprintf (f, ">");
15854 return;
15856 case CONST_STRING:
15857 fprintf (f, "\"%s\"", XSTR (x, 0));
15858 return;
15860 case SYMBOL_REF:
15861 fprintf (f, "`%s'", XSTR (x, 0));
15862 return;
15864 case LABEL_REF:
15865 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15866 return;
15868 case CONST:
15869 arm_print_value (f, XEXP (x, 0));
15870 return;
15872 case PLUS:
15873 arm_print_value (f, XEXP (x, 0));
15874 fprintf (f, "+");
15875 arm_print_value (f, XEXP (x, 1));
15876 return;
15878 case PC:
15879 fprintf (f, "pc");
15880 return;
15882 default:
15883 fprintf (f, "????");
15884 return;
15888 /* Routines for manipulation of the constant pool. */
15890 /* Arm instructions cannot load a large constant directly into a
15891 register; they have to come from a pc relative load. The constant
15892 must therefore be placed in the addressable range of the pc
15893 relative load. Depending on the precise pc relative load
15894 instruction the range is somewhere between 256 bytes and 4k. This
15895 means that we often have to dump a constant inside a function, and
15896 generate code to branch around it.
15898 It is important to minimize this, since the branches will slow
15899 things down and make the code larger.
15901 Normally we can hide the table after an existing unconditional
15902 branch so that there is no interruption of the flow, but in the
15903 worst case the code looks like this:
15905 ldr rn, L1
15907 b L2
15908 align
15909 L1: .long value
15913 ldr rn, L3
15915 b L4
15916 align
15917 L3: .long value
15921 We fix this by performing a scan after scheduling, which notices
15922 which instructions need to have their operands fetched from the
15923 constant table and builds the table.
15925 The algorithm starts by building a table of all the constants that
15926 need fixing up and all the natural barriers in the function (places
15927 where a constant table can be dropped without breaking the flow).
15928 For each fixup we note how far the pc-relative replacement will be
15929 able to reach and the offset of the instruction into the function.
15931 Having built the table we then group the fixes together to form
15932 tables that are as large as possible (subject to addressing
15933 constraints) and emit each table of constants after the last
15934 barrier that is within range of all the instructions in the group.
15935 If a group does not contain a barrier, then we forcibly create one
15936 by inserting a jump instruction into the flow. Once the table has
15937 been inserted, the insns are then modified to reference the
15938 relevant entry in the pool.
15940 Possible enhancements to the algorithm (not implemented) are:
15942 1) For some processors and object formats, there may be benefit in
15943 aligning the pools to the start of cache lines; this alignment
15944 would need to be taken into account when calculating addressability
15945 of a pool. */
15947 /* These typedefs are located at the start of this file, so that
15948 they can be used in the prototypes there. This comment is to
15949 remind readers of that fact so that the following structures
15950 can be understood more easily.
15952 typedef struct minipool_node Mnode;
15953 typedef struct minipool_fixup Mfix; */
15955 struct minipool_node
15957 /* Doubly linked chain of entries. */
15958 Mnode * next;
15959 Mnode * prev;
15960 /* The maximum offset into the code that this entry can be placed. While
15961 pushing fixes for forward references, all entries are sorted in order
15962 of increasing max_address. */
15963 HOST_WIDE_INT max_address;
15964 /* Similarly for an entry inserted for a backwards ref. */
15965 HOST_WIDE_INT min_address;
15966 /* The number of fixes referencing this entry. This can become zero
15967 if we "unpush" an entry. In this case we ignore the entry when we
15968 come to emit the code. */
15969 int refcount;
15970 /* The offset from the start of the minipool. */
15971 HOST_WIDE_INT offset;
15972 /* The value in table. */
15973 rtx value;
15974 /* The mode of value. */
15975 enum machine_mode mode;
15976 /* The size of the value. With iWMMXt enabled
15977 sizes > 4 also imply an alignment of 8-bytes. */
15978 int fix_size;
15981 struct minipool_fixup
15983 Mfix * next;
15984 rtx insn;
15985 HOST_WIDE_INT address;
15986 rtx * loc;
15987 enum machine_mode mode;
15988 int fix_size;
15989 rtx value;
15990 Mnode * minipool;
15991 HOST_WIDE_INT forwards;
15992 HOST_WIDE_INT backwards;
15995 /* Fixes less than a word need padding out to a word boundary. */
15996 #define MINIPOOL_FIX_SIZE(mode) \
15997 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15999 static Mnode * minipool_vector_head;
16000 static Mnode * minipool_vector_tail;
16001 static rtx minipool_vector_label;
16002 static int minipool_pad;
16004 /* The linked list of all minipool fixes required for this function. */
16005 Mfix * minipool_fix_head;
16006 Mfix * minipool_fix_tail;
16007 /* The fix entry for the current minipool, once it has been placed. */
16008 Mfix * minipool_barrier;
16010 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16011 #define JUMP_TABLES_IN_TEXT_SECTION 0
16012 #endif
16014 static HOST_WIDE_INT
16015 get_jump_table_size (rtx insn)
16017 /* ADDR_VECs only take room if read-only data does into the text
16018 section. */
16019 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16021 rtx body = PATTERN (insn);
16022 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16023 HOST_WIDE_INT size;
16024 HOST_WIDE_INT modesize;
16026 modesize = GET_MODE_SIZE (GET_MODE (body));
16027 size = modesize * XVECLEN (body, elt);
16028 switch (modesize)
16030 case 1:
16031 /* Round up size of TBB table to a halfword boundary. */
16032 size = (size + 1) & ~(HOST_WIDE_INT)1;
16033 break;
16034 case 2:
16035 /* No padding necessary for TBH. */
16036 break;
16037 case 4:
16038 /* Add two bytes for alignment on Thumb. */
16039 if (TARGET_THUMB)
16040 size += 2;
16041 break;
16042 default:
16043 gcc_unreachable ();
16045 return size;
16048 return 0;
16051 /* Return the maximum amount of padding that will be inserted before
16052 label LABEL. */
16054 static HOST_WIDE_INT
16055 get_label_padding (rtx label)
16057 HOST_WIDE_INT align, min_insn_size;
16059 align = 1 << label_to_alignment (label);
16060 min_insn_size = TARGET_THUMB ? 2 : 4;
16061 return align > min_insn_size ? align - min_insn_size : 0;
16064 /* Move a minipool fix MP from its current location to before MAX_MP.
16065 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16066 constraints may need updating. */
16067 static Mnode *
16068 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16069 HOST_WIDE_INT max_address)
16071 /* The code below assumes these are different. */
16072 gcc_assert (mp != max_mp);
16074 if (max_mp == NULL)
16076 if (max_address < mp->max_address)
16077 mp->max_address = max_address;
16079 else
16081 if (max_address > max_mp->max_address - mp->fix_size)
16082 mp->max_address = max_mp->max_address - mp->fix_size;
16083 else
16084 mp->max_address = max_address;
16086 /* Unlink MP from its current position. Since max_mp is non-null,
16087 mp->prev must be non-null. */
16088 mp->prev->next = mp->next;
16089 if (mp->next != NULL)
16090 mp->next->prev = mp->prev;
16091 else
16092 minipool_vector_tail = mp->prev;
16094 /* Re-insert it before MAX_MP. */
16095 mp->next = max_mp;
16096 mp->prev = max_mp->prev;
16097 max_mp->prev = mp;
16099 if (mp->prev != NULL)
16100 mp->prev->next = mp;
16101 else
16102 minipool_vector_head = mp;
16105 /* Save the new entry. */
16106 max_mp = mp;
16108 /* Scan over the preceding entries and adjust their addresses as
16109 required. */
16110 while (mp->prev != NULL
16111 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16113 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16114 mp = mp->prev;
16117 return max_mp;
16120 /* Add a constant to the minipool for a forward reference. Returns the
16121 node added or NULL if the constant will not fit in this pool. */
16122 static Mnode *
16123 add_minipool_forward_ref (Mfix *fix)
16125 /* If set, max_mp is the first pool_entry that has a lower
16126 constraint than the one we are trying to add. */
16127 Mnode * max_mp = NULL;
16128 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16129 Mnode * mp;
16131 /* If the minipool starts before the end of FIX->INSN then this FIX
16132 can not be placed into the current pool. Furthermore, adding the
16133 new constant pool entry may cause the pool to start FIX_SIZE bytes
16134 earlier. */
16135 if (minipool_vector_head &&
16136 (fix->address + get_attr_length (fix->insn)
16137 >= minipool_vector_head->max_address - fix->fix_size))
16138 return NULL;
16140 /* Scan the pool to see if a constant with the same value has
16141 already been added. While we are doing this, also note the
16142 location where we must insert the constant if it doesn't already
16143 exist. */
16144 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16146 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16147 && fix->mode == mp->mode
16148 && (!LABEL_P (fix->value)
16149 || (CODE_LABEL_NUMBER (fix->value)
16150 == CODE_LABEL_NUMBER (mp->value)))
16151 && rtx_equal_p (fix->value, mp->value))
16153 /* More than one fix references this entry. */
16154 mp->refcount++;
16155 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16158 /* Note the insertion point if necessary. */
16159 if (max_mp == NULL
16160 && mp->max_address > max_address)
16161 max_mp = mp;
16163 /* If we are inserting an 8-bytes aligned quantity and
16164 we have not already found an insertion point, then
16165 make sure that all such 8-byte aligned quantities are
16166 placed at the start of the pool. */
16167 if (ARM_DOUBLEWORD_ALIGN
16168 && max_mp == NULL
16169 && fix->fix_size >= 8
16170 && mp->fix_size < 8)
16172 max_mp = mp;
16173 max_address = mp->max_address;
16177 /* The value is not currently in the minipool, so we need to create
16178 a new entry for it. If MAX_MP is NULL, the entry will be put on
16179 the end of the list since the placement is less constrained than
16180 any existing entry. Otherwise, we insert the new fix before
16181 MAX_MP and, if necessary, adjust the constraints on the other
16182 entries. */
16183 mp = XNEW (Mnode);
16184 mp->fix_size = fix->fix_size;
16185 mp->mode = fix->mode;
16186 mp->value = fix->value;
16187 mp->refcount = 1;
16188 /* Not yet required for a backwards ref. */
16189 mp->min_address = -65536;
16191 if (max_mp == NULL)
16193 mp->max_address = max_address;
16194 mp->next = NULL;
16195 mp->prev = minipool_vector_tail;
16197 if (mp->prev == NULL)
16199 minipool_vector_head = mp;
16200 minipool_vector_label = gen_label_rtx ();
16202 else
16203 mp->prev->next = mp;
16205 minipool_vector_tail = mp;
16207 else
16209 if (max_address > max_mp->max_address - mp->fix_size)
16210 mp->max_address = max_mp->max_address - mp->fix_size;
16211 else
16212 mp->max_address = max_address;
16214 mp->next = max_mp;
16215 mp->prev = max_mp->prev;
16216 max_mp->prev = mp;
16217 if (mp->prev != NULL)
16218 mp->prev->next = mp;
16219 else
16220 minipool_vector_head = mp;
16223 /* Save the new entry. */
16224 max_mp = mp;
16226 /* Scan over the preceding entries and adjust their addresses as
16227 required. */
16228 while (mp->prev != NULL
16229 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16231 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16232 mp = mp->prev;
16235 return max_mp;
16238 static Mnode *
16239 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16240 HOST_WIDE_INT min_address)
16242 HOST_WIDE_INT offset;
16244 /* The code below assumes these are different. */
16245 gcc_assert (mp != min_mp);
16247 if (min_mp == NULL)
16249 if (min_address > mp->min_address)
16250 mp->min_address = min_address;
16252 else
16254 /* We will adjust this below if it is too loose. */
16255 mp->min_address = min_address;
16257 /* Unlink MP from its current position. Since min_mp is non-null,
16258 mp->next must be non-null. */
16259 mp->next->prev = mp->prev;
16260 if (mp->prev != NULL)
16261 mp->prev->next = mp->next;
16262 else
16263 minipool_vector_head = mp->next;
16265 /* Reinsert it after MIN_MP. */
16266 mp->prev = min_mp;
16267 mp->next = min_mp->next;
16268 min_mp->next = mp;
16269 if (mp->next != NULL)
16270 mp->next->prev = mp;
16271 else
16272 minipool_vector_tail = mp;
16275 min_mp = mp;
16277 offset = 0;
16278 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16280 mp->offset = offset;
16281 if (mp->refcount > 0)
16282 offset += mp->fix_size;
16284 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16285 mp->next->min_address = mp->min_address + mp->fix_size;
16288 return min_mp;
16291 /* Add a constant to the minipool for a backward reference. Returns the
16292 node added or NULL if the constant will not fit in this pool.
16294 Note that the code for insertion for a backwards reference can be
16295 somewhat confusing because the calculated offsets for each fix do
16296 not take into account the size of the pool (which is still under
16297 construction. */
16298 static Mnode *
16299 add_minipool_backward_ref (Mfix *fix)
16301 /* If set, min_mp is the last pool_entry that has a lower constraint
16302 than the one we are trying to add. */
16303 Mnode *min_mp = NULL;
16304 /* This can be negative, since it is only a constraint. */
16305 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16306 Mnode *mp;
16308 /* If we can't reach the current pool from this insn, or if we can't
16309 insert this entry at the end of the pool without pushing other
16310 fixes out of range, then we don't try. This ensures that we
16311 can't fail later on. */
16312 if (min_address >= minipool_barrier->address
16313 || (minipool_vector_tail->min_address + fix->fix_size
16314 >= minipool_barrier->address))
16315 return NULL;
16317 /* Scan the pool to see if a constant with the same value has
16318 already been added. While we are doing this, also note the
16319 location where we must insert the constant if it doesn't already
16320 exist. */
16321 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16323 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16324 && fix->mode == mp->mode
16325 && (!LABEL_P (fix->value)
16326 || (CODE_LABEL_NUMBER (fix->value)
16327 == CODE_LABEL_NUMBER (mp->value)))
16328 && rtx_equal_p (fix->value, mp->value)
16329 /* Check that there is enough slack to move this entry to the
16330 end of the table (this is conservative). */
16331 && (mp->max_address
16332 > (minipool_barrier->address
16333 + minipool_vector_tail->offset
16334 + minipool_vector_tail->fix_size)))
16336 mp->refcount++;
16337 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16340 if (min_mp != NULL)
16341 mp->min_address += fix->fix_size;
16342 else
16344 /* Note the insertion point if necessary. */
16345 if (mp->min_address < min_address)
16347 /* For now, we do not allow the insertion of 8-byte alignment
16348 requiring nodes anywhere but at the start of the pool. */
16349 if (ARM_DOUBLEWORD_ALIGN
16350 && fix->fix_size >= 8 && mp->fix_size < 8)
16351 return NULL;
16352 else
16353 min_mp = mp;
16355 else if (mp->max_address
16356 < minipool_barrier->address + mp->offset + fix->fix_size)
16358 /* Inserting before this entry would push the fix beyond
16359 its maximum address (which can happen if we have
16360 re-located a forwards fix); force the new fix to come
16361 after it. */
16362 if (ARM_DOUBLEWORD_ALIGN
16363 && fix->fix_size >= 8 && mp->fix_size < 8)
16364 return NULL;
16365 else
16367 min_mp = mp;
16368 min_address = mp->min_address + fix->fix_size;
16371 /* Do not insert a non-8-byte aligned quantity before 8-byte
16372 aligned quantities. */
16373 else if (ARM_DOUBLEWORD_ALIGN
16374 && fix->fix_size < 8
16375 && mp->fix_size >= 8)
16377 min_mp = mp;
16378 min_address = mp->min_address + fix->fix_size;
16383 /* We need to create a new entry. */
16384 mp = XNEW (Mnode);
16385 mp->fix_size = fix->fix_size;
16386 mp->mode = fix->mode;
16387 mp->value = fix->value;
16388 mp->refcount = 1;
16389 mp->max_address = minipool_barrier->address + 65536;
16391 mp->min_address = min_address;
16393 if (min_mp == NULL)
16395 mp->prev = NULL;
16396 mp->next = minipool_vector_head;
16398 if (mp->next == NULL)
16400 minipool_vector_tail = mp;
16401 minipool_vector_label = gen_label_rtx ();
16403 else
16404 mp->next->prev = mp;
16406 minipool_vector_head = mp;
16408 else
16410 mp->next = min_mp->next;
16411 mp->prev = min_mp;
16412 min_mp->next = mp;
16414 if (mp->next != NULL)
16415 mp->next->prev = mp;
16416 else
16417 minipool_vector_tail = mp;
16420 /* Save the new entry. */
16421 min_mp = mp;
16423 if (mp->prev)
16424 mp = mp->prev;
16425 else
16426 mp->offset = 0;
16428 /* Scan over the following entries and adjust their offsets. */
16429 while (mp->next != NULL)
16431 if (mp->next->min_address < mp->min_address + mp->fix_size)
16432 mp->next->min_address = mp->min_address + mp->fix_size;
16434 if (mp->refcount)
16435 mp->next->offset = mp->offset + mp->fix_size;
16436 else
16437 mp->next->offset = mp->offset;
16439 mp = mp->next;
16442 return min_mp;
16445 static void
16446 assign_minipool_offsets (Mfix *barrier)
16448 HOST_WIDE_INT offset = 0;
16449 Mnode *mp;
16451 minipool_barrier = barrier;
16453 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16455 mp->offset = offset;
16457 if (mp->refcount > 0)
16458 offset += mp->fix_size;
16462 /* Output the literal table */
16463 static void
16464 dump_minipool (rtx scan)
16466 Mnode * mp;
16467 Mnode * nmp;
16468 int align64 = 0;
16470 if (ARM_DOUBLEWORD_ALIGN)
16471 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16472 if (mp->refcount > 0 && mp->fix_size >= 8)
16474 align64 = 1;
16475 break;
16478 if (dump_file)
16479 fprintf (dump_file,
16480 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16481 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16483 scan = emit_label_after (gen_label_rtx (), scan);
16484 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16485 scan = emit_label_after (minipool_vector_label, scan);
16487 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16489 if (mp->refcount > 0)
16491 if (dump_file)
16493 fprintf (dump_file,
16494 ";; Offset %u, min %ld, max %ld ",
16495 (unsigned) mp->offset, (unsigned long) mp->min_address,
16496 (unsigned long) mp->max_address);
16497 arm_print_value (dump_file, mp->value);
16498 fputc ('\n', dump_file);
16501 switch (mp->fix_size)
16503 #ifdef HAVE_consttable_1
16504 case 1:
16505 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16506 break;
16508 #endif
16509 #ifdef HAVE_consttable_2
16510 case 2:
16511 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16512 break;
16514 #endif
16515 #ifdef HAVE_consttable_4
16516 case 4:
16517 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16518 break;
16520 #endif
16521 #ifdef HAVE_consttable_8
16522 case 8:
16523 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16524 break;
16526 #endif
16527 #ifdef HAVE_consttable_16
16528 case 16:
16529 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16530 break;
16532 #endif
16533 default:
16534 gcc_unreachable ();
16538 nmp = mp->next;
16539 free (mp);
16542 minipool_vector_head = minipool_vector_tail = NULL;
16543 scan = emit_insn_after (gen_consttable_end (), scan);
16544 scan = emit_barrier_after (scan);
16547 /* Return the cost of forcibly inserting a barrier after INSN. */
16548 static int
16549 arm_barrier_cost (rtx insn)
16551 /* Basing the location of the pool on the loop depth is preferable,
16552 but at the moment, the basic block information seems to be
16553 corrupt by this stage of the compilation. */
16554 int base_cost = 50;
16555 rtx next = next_nonnote_insn (insn);
16557 if (next != NULL && LABEL_P (next))
16558 base_cost -= 20;
16560 switch (GET_CODE (insn))
16562 case CODE_LABEL:
16563 /* It will always be better to place the table before the label, rather
16564 than after it. */
16565 return 50;
16567 case INSN:
16568 case CALL_INSN:
16569 return base_cost;
16571 case JUMP_INSN:
16572 return base_cost - 10;
16574 default:
16575 return base_cost + 10;
16579 /* Find the best place in the insn stream in the range
16580 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16581 Create the barrier by inserting a jump and add a new fix entry for
16582 it. */
16583 static Mfix *
16584 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16586 HOST_WIDE_INT count = 0;
16587 rtx barrier;
16588 rtx from = fix->insn;
16589 /* The instruction after which we will insert the jump. */
16590 rtx selected = NULL;
16591 int selected_cost;
16592 /* The address at which the jump instruction will be placed. */
16593 HOST_WIDE_INT selected_address;
16594 Mfix * new_fix;
16595 HOST_WIDE_INT max_count = max_address - fix->address;
16596 rtx label = gen_label_rtx ();
16598 selected_cost = arm_barrier_cost (from);
16599 selected_address = fix->address;
16601 while (from && count < max_count)
16603 rtx tmp;
16604 int new_cost;
16606 /* This code shouldn't have been called if there was a natural barrier
16607 within range. */
16608 gcc_assert (!BARRIER_P (from));
16610 /* Count the length of this insn. This must stay in sync with the
16611 code that pushes minipool fixes. */
16612 if (LABEL_P (from))
16613 count += get_label_padding (from);
16614 else
16615 count += get_attr_length (from);
16617 /* If there is a jump table, add its length. */
16618 if (tablejump_p (from, NULL, &tmp))
16620 count += get_jump_table_size (tmp);
16622 /* Jump tables aren't in a basic block, so base the cost on
16623 the dispatch insn. If we select this location, we will
16624 still put the pool after the table. */
16625 new_cost = arm_barrier_cost (from);
16627 if (count < max_count
16628 && (!selected || new_cost <= selected_cost))
16630 selected = tmp;
16631 selected_cost = new_cost;
16632 selected_address = fix->address + count;
16635 /* Continue after the dispatch table. */
16636 from = NEXT_INSN (tmp);
16637 continue;
16640 new_cost = arm_barrier_cost (from);
16642 if (count < max_count
16643 && (!selected || new_cost <= selected_cost))
16645 selected = from;
16646 selected_cost = new_cost;
16647 selected_address = fix->address + count;
16650 from = NEXT_INSN (from);
16653 /* Make sure that we found a place to insert the jump. */
16654 gcc_assert (selected);
16656 /* Make sure we do not split a call and its corresponding
16657 CALL_ARG_LOCATION note. */
16658 if (CALL_P (selected))
16660 rtx next = NEXT_INSN (selected);
16661 if (next && NOTE_P (next)
16662 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16663 selected = next;
16666 /* Create a new JUMP_INSN that branches around a barrier. */
16667 from = emit_jump_insn_after (gen_jump (label), selected);
16668 JUMP_LABEL (from) = label;
16669 barrier = emit_barrier_after (from);
16670 emit_label_after (label, barrier);
16672 /* Create a minipool barrier entry for the new barrier. */
16673 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16674 new_fix->insn = barrier;
16675 new_fix->address = selected_address;
16676 new_fix->next = fix->next;
16677 fix->next = new_fix;
16679 return new_fix;
16682 /* Record that there is a natural barrier in the insn stream at
16683 ADDRESS. */
16684 static void
16685 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16687 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16689 fix->insn = insn;
16690 fix->address = address;
16692 fix->next = NULL;
16693 if (minipool_fix_head != NULL)
16694 minipool_fix_tail->next = fix;
16695 else
16696 minipool_fix_head = fix;
16698 minipool_fix_tail = fix;
16701 /* Record INSN, which will need fixing up to load a value from the
16702 minipool. ADDRESS is the offset of the insn since the start of the
16703 function; LOC is a pointer to the part of the insn which requires
16704 fixing; VALUE is the constant that must be loaded, which is of type
16705 MODE. */
16706 static void
16707 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16708 enum machine_mode mode, rtx value)
16710 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16712 fix->insn = insn;
16713 fix->address = address;
16714 fix->loc = loc;
16715 fix->mode = mode;
16716 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16717 fix->value = value;
16718 fix->forwards = get_attr_pool_range (insn);
16719 fix->backwards = get_attr_neg_pool_range (insn);
16720 fix->minipool = NULL;
16722 /* If an insn doesn't have a range defined for it, then it isn't
16723 expecting to be reworked by this code. Better to stop now than
16724 to generate duff assembly code. */
16725 gcc_assert (fix->forwards || fix->backwards);
16727 /* If an entry requires 8-byte alignment then assume all constant pools
16728 require 4 bytes of padding. Trying to do this later on a per-pool
16729 basis is awkward because existing pool entries have to be modified. */
16730 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16731 minipool_pad = 4;
16733 if (dump_file)
16735 fprintf (dump_file,
16736 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16737 GET_MODE_NAME (mode),
16738 INSN_UID (insn), (unsigned long) address,
16739 -1 * (long)fix->backwards, (long)fix->forwards);
16740 arm_print_value (dump_file, fix->value);
16741 fprintf (dump_file, "\n");
16744 /* Add it to the chain of fixes. */
16745 fix->next = NULL;
16747 if (minipool_fix_head != NULL)
16748 minipool_fix_tail->next = fix;
16749 else
16750 minipool_fix_head = fix;
16752 minipool_fix_tail = fix;
16755 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16756 Returns the number of insns needed, or 99 if we always want to synthesize
16757 the value. */
16759 arm_max_const_double_inline_cost ()
16761 /* Let the value get synthesized to avoid the use of literal pools. */
16762 if (arm_disable_literal_pool)
16763 return 99;
16765 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16768 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16769 Returns the number of insns needed, or 99 if we don't know how to
16770 do it. */
16772 arm_const_double_inline_cost (rtx val)
16774 rtx lowpart, highpart;
16775 enum machine_mode mode;
16777 mode = GET_MODE (val);
16779 if (mode == VOIDmode)
16780 mode = DImode;
16782 gcc_assert (GET_MODE_SIZE (mode) == 8);
16784 lowpart = gen_lowpart (SImode, val);
16785 highpart = gen_highpart_mode (SImode, mode, val);
16787 gcc_assert (CONST_INT_P (lowpart));
16788 gcc_assert (CONST_INT_P (highpart));
16790 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16791 NULL_RTX, NULL_RTX, 0, 0)
16792 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16793 NULL_RTX, NULL_RTX, 0, 0));
16796 /* Return true if it is worthwhile to split a 64-bit constant into two
16797 32-bit operations. This is the case if optimizing for size, or
16798 if we have load delay slots, or if one 32-bit part can be done with
16799 a single data operation. */
16800 bool
16801 arm_const_double_by_parts (rtx val)
16803 enum machine_mode mode = GET_MODE (val);
16804 rtx part;
16806 if (optimize_size || arm_ld_sched)
16807 return true;
16809 if (mode == VOIDmode)
16810 mode = DImode;
16812 part = gen_highpart_mode (SImode, mode, val);
16814 gcc_assert (CONST_INT_P (part));
16816 if (const_ok_for_arm (INTVAL (part))
16817 || const_ok_for_arm (~INTVAL (part)))
16818 return true;
16820 part = gen_lowpart (SImode, val);
16822 gcc_assert (CONST_INT_P (part));
16824 if (const_ok_for_arm (INTVAL (part))
16825 || const_ok_for_arm (~INTVAL (part)))
16826 return true;
16828 return false;
16831 /* Return true if it is possible to inline both the high and low parts
16832 of a 64-bit constant into 32-bit data processing instructions. */
16833 bool
16834 arm_const_double_by_immediates (rtx val)
16836 enum machine_mode mode = GET_MODE (val);
16837 rtx part;
16839 if (mode == VOIDmode)
16840 mode = DImode;
16842 part = gen_highpart_mode (SImode, mode, val);
16844 gcc_assert (CONST_INT_P (part));
16846 if (!const_ok_for_arm (INTVAL (part)))
16847 return false;
16849 part = gen_lowpart (SImode, val);
16851 gcc_assert (CONST_INT_P (part));
16853 if (!const_ok_for_arm (INTVAL (part)))
16854 return false;
16856 return true;
16859 /* Scan INSN and note any of its operands that need fixing.
16860 If DO_PUSHES is false we do not actually push any of the fixups
16861 needed. */
16862 static void
16863 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16865 int opno;
16867 extract_insn (insn);
16869 if (!constrain_operands (1))
16870 fatal_insn_not_found (insn);
16872 if (recog_data.n_alternatives == 0)
16873 return;
16875 /* Fill in recog_op_alt with information about the constraints of
16876 this insn. */
16877 preprocess_constraints ();
16879 for (opno = 0; opno < recog_data.n_operands; opno++)
16881 /* Things we need to fix can only occur in inputs. */
16882 if (recog_data.operand_type[opno] != OP_IN)
16883 continue;
16885 /* If this alternative is a memory reference, then any mention
16886 of constants in this alternative is really to fool reload
16887 into allowing us to accept one there. We need to fix them up
16888 now so that we output the right code. */
16889 if (recog_op_alt[opno][which_alternative].memory_ok)
16891 rtx op = recog_data.operand[opno];
16893 if (CONSTANT_P (op))
16895 if (do_pushes)
16896 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16897 recog_data.operand_mode[opno], op);
16899 else if (MEM_P (op)
16900 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16901 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16903 if (do_pushes)
16905 rtx cop = avoid_constant_pool_reference (op);
16907 /* Casting the address of something to a mode narrower
16908 than a word can cause avoid_constant_pool_reference()
16909 to return the pool reference itself. That's no good to
16910 us here. Lets just hope that we can use the
16911 constant pool value directly. */
16912 if (op == cop)
16913 cop = get_pool_constant (XEXP (op, 0));
16915 push_minipool_fix (insn, address,
16916 recog_data.operand_loc[opno],
16917 recog_data.operand_mode[opno], cop);
16924 return;
16927 /* Rewrite move insn into subtract of 0 if the condition codes will
16928 be useful in next conditional jump insn. */
16930 static void
16931 thumb1_reorg (void)
16933 basic_block bb;
16935 FOR_EACH_BB_FN (bb, cfun)
16937 rtx dest, src;
16938 rtx pat, op0, set = NULL;
16939 rtx prev, insn = BB_END (bb);
16940 bool insn_clobbered = false;
16942 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16943 insn = PREV_INSN (insn);
16945 /* Find the last cbranchsi4_insn in basic block BB. */
16946 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16947 continue;
16949 /* Get the register with which we are comparing. */
16950 pat = PATTERN (insn);
16951 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16953 /* Find the first flag setting insn before INSN in basic block BB. */
16954 gcc_assert (insn != BB_HEAD (bb));
16955 for (prev = PREV_INSN (insn);
16956 (!insn_clobbered
16957 && prev != BB_HEAD (bb)
16958 && (NOTE_P (prev)
16959 || DEBUG_INSN_P (prev)
16960 || ((set = single_set (prev)) != NULL
16961 && get_attr_conds (prev) == CONDS_NOCOND)));
16962 prev = PREV_INSN (prev))
16964 if (reg_set_p (op0, prev))
16965 insn_clobbered = true;
16968 /* Skip if op0 is clobbered by insn other than prev. */
16969 if (insn_clobbered)
16970 continue;
16972 if (!set)
16973 continue;
16975 dest = SET_DEST (set);
16976 src = SET_SRC (set);
16977 if (!low_register_operand (dest, SImode)
16978 || !low_register_operand (src, SImode))
16979 continue;
16981 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16982 in INSN. Both src and dest of the move insn are checked. */
16983 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16985 dest = copy_rtx (dest);
16986 src = copy_rtx (src);
16987 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16988 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16989 INSN_CODE (prev) = -1;
16990 /* Set test register in INSN to dest. */
16991 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16992 INSN_CODE (insn) = -1;
16997 /* Convert instructions to their cc-clobbering variant if possible, since
16998 that allows us to use smaller encodings. */
17000 static void
17001 thumb2_reorg (void)
17003 basic_block bb;
17004 regset_head live;
17006 INIT_REG_SET (&live);
17008 /* We are freeing block_for_insn in the toplev to keep compatibility
17009 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17010 compute_bb_for_insn ();
17011 df_analyze ();
17013 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17015 FOR_EACH_BB_FN (bb, cfun)
17017 if (current_tune->disparage_flag_setting_t16_encodings
17018 && optimize_bb_for_speed_p (bb))
17019 continue;
17021 rtx insn;
17022 Convert_Action action = SKIP;
17023 Convert_Action action_for_partial_flag_setting
17024 = (current_tune->disparage_partial_flag_setting_t16_encodings
17025 && optimize_bb_for_speed_p (bb))
17026 ? SKIP : CONV;
17028 COPY_REG_SET (&live, DF_LR_OUT (bb));
17029 df_simulate_initialize_backwards (bb, &live);
17030 FOR_BB_INSNS_REVERSE (bb, insn)
17032 if (NONJUMP_INSN_P (insn)
17033 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17034 && GET_CODE (PATTERN (insn)) == SET)
17036 action = SKIP;
17037 rtx pat = PATTERN (insn);
17038 rtx dst = XEXP (pat, 0);
17039 rtx src = XEXP (pat, 1);
17040 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17042 if (!OBJECT_P (src))
17043 op0 = XEXP (src, 0);
17045 if (BINARY_P (src))
17046 op1 = XEXP (src, 1);
17048 if (low_register_operand (dst, SImode))
17050 switch (GET_CODE (src))
17052 case PLUS:
17053 /* Adding two registers and storing the result
17054 in the first source is already a 16-bit
17055 operation. */
17056 if (rtx_equal_p (dst, op0)
17057 && register_operand (op1, SImode))
17058 break;
17060 if (low_register_operand (op0, SImode))
17062 /* ADDS <Rd>,<Rn>,<Rm> */
17063 if (low_register_operand (op1, SImode))
17064 action = CONV;
17065 /* ADDS <Rdn>,#<imm8> */
17066 /* SUBS <Rdn>,#<imm8> */
17067 else if (rtx_equal_p (dst, op0)
17068 && CONST_INT_P (op1)
17069 && IN_RANGE (INTVAL (op1), -255, 255))
17070 action = CONV;
17071 /* ADDS <Rd>,<Rn>,#<imm3> */
17072 /* SUBS <Rd>,<Rn>,#<imm3> */
17073 else if (CONST_INT_P (op1)
17074 && IN_RANGE (INTVAL (op1), -7, 7))
17075 action = CONV;
17077 /* ADCS <Rd>, <Rn> */
17078 else if (GET_CODE (XEXP (src, 0)) == PLUS
17079 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17080 && low_register_operand (XEXP (XEXP (src, 0), 1),
17081 SImode)
17082 && COMPARISON_P (op1)
17083 && cc_register (XEXP (op1, 0), VOIDmode)
17084 && maybe_get_arm_condition_code (op1) == ARM_CS
17085 && XEXP (op1, 1) == const0_rtx)
17086 action = CONV;
17087 break;
17089 case MINUS:
17090 /* RSBS <Rd>,<Rn>,#0
17091 Not handled here: see NEG below. */
17092 /* SUBS <Rd>,<Rn>,#<imm3>
17093 SUBS <Rdn>,#<imm8>
17094 Not handled here: see PLUS above. */
17095 /* SUBS <Rd>,<Rn>,<Rm> */
17096 if (low_register_operand (op0, SImode)
17097 && low_register_operand (op1, SImode))
17098 action = CONV;
17099 break;
17101 case MULT:
17102 /* MULS <Rdm>,<Rn>,<Rdm>
17103 As an exception to the rule, this is only used
17104 when optimizing for size since MULS is slow on all
17105 known implementations. We do not even want to use
17106 MULS in cold code, if optimizing for speed, so we
17107 test the global flag here. */
17108 if (!optimize_size)
17109 break;
17110 /* else fall through. */
17111 case AND:
17112 case IOR:
17113 case XOR:
17114 /* ANDS <Rdn>,<Rm> */
17115 if (rtx_equal_p (dst, op0)
17116 && low_register_operand (op1, SImode))
17117 action = action_for_partial_flag_setting;
17118 else if (rtx_equal_p (dst, op1)
17119 && low_register_operand (op0, SImode))
17120 action = action_for_partial_flag_setting == SKIP
17121 ? SKIP : SWAP_CONV;
17122 break;
17124 case ASHIFTRT:
17125 case ASHIFT:
17126 case LSHIFTRT:
17127 /* ASRS <Rdn>,<Rm> */
17128 /* LSRS <Rdn>,<Rm> */
17129 /* LSLS <Rdn>,<Rm> */
17130 if (rtx_equal_p (dst, op0)
17131 && low_register_operand (op1, SImode))
17132 action = action_for_partial_flag_setting;
17133 /* ASRS <Rd>,<Rm>,#<imm5> */
17134 /* LSRS <Rd>,<Rm>,#<imm5> */
17135 /* LSLS <Rd>,<Rm>,#<imm5> */
17136 else if (low_register_operand (op0, SImode)
17137 && CONST_INT_P (op1)
17138 && IN_RANGE (INTVAL (op1), 0, 31))
17139 action = action_for_partial_flag_setting;
17140 break;
17142 case ROTATERT:
17143 /* RORS <Rdn>,<Rm> */
17144 if (rtx_equal_p (dst, op0)
17145 && low_register_operand (op1, SImode))
17146 action = action_for_partial_flag_setting;
17147 break;
17149 case NOT:
17150 /* MVNS <Rd>,<Rm> */
17151 if (low_register_operand (op0, SImode))
17152 action = action_for_partial_flag_setting;
17153 break;
17155 case NEG:
17156 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17157 if (low_register_operand (op0, SImode))
17158 action = CONV;
17159 break;
17161 case CONST_INT:
17162 /* MOVS <Rd>,#<imm8> */
17163 if (CONST_INT_P (src)
17164 && IN_RANGE (INTVAL (src), 0, 255))
17165 action = action_for_partial_flag_setting;
17166 break;
17168 case REG:
17169 /* MOVS and MOV<c> with registers have different
17170 encodings, so are not relevant here. */
17171 break;
17173 default:
17174 break;
17178 if (action != SKIP)
17180 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17181 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17182 rtvec vec;
17184 if (action == SWAP_CONV)
17186 src = copy_rtx (src);
17187 XEXP (src, 0) = op1;
17188 XEXP (src, 1) = op0;
17189 pat = gen_rtx_SET (VOIDmode, dst, src);
17190 vec = gen_rtvec (2, pat, clobber);
17192 else /* action == CONV */
17193 vec = gen_rtvec (2, pat, clobber);
17195 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17196 INSN_CODE (insn) = -1;
17200 if (NONDEBUG_INSN_P (insn))
17201 df_simulate_one_insn_backwards (bb, insn, &live);
17205 CLEAR_REG_SET (&live);
17208 /* Gcc puts the pool in the wrong place for ARM, since we can only
17209 load addresses a limited distance around the pc. We do some
17210 special munging to move the constant pool values to the correct
17211 point in the code. */
17212 static void
17213 arm_reorg (void)
17215 rtx insn;
17216 HOST_WIDE_INT address = 0;
17217 Mfix * fix;
17219 if (TARGET_THUMB1)
17220 thumb1_reorg ();
17221 else if (TARGET_THUMB2)
17222 thumb2_reorg ();
17224 /* Ensure all insns that must be split have been split at this point.
17225 Otherwise, the pool placement code below may compute incorrect
17226 insn lengths. Note that when optimizing, all insns have already
17227 been split at this point. */
17228 if (!optimize)
17229 split_all_insns_noflow ();
17231 minipool_fix_head = minipool_fix_tail = NULL;
17233 /* The first insn must always be a note, or the code below won't
17234 scan it properly. */
17235 insn = get_insns ();
17236 gcc_assert (NOTE_P (insn));
17237 minipool_pad = 0;
17239 /* Scan all the insns and record the operands that will need fixing. */
17240 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17242 if (BARRIER_P (insn))
17243 push_minipool_barrier (insn, address);
17244 else if (INSN_P (insn))
17246 rtx table;
17248 note_invalid_constants (insn, address, true);
17249 address += get_attr_length (insn);
17251 /* If the insn is a vector jump, add the size of the table
17252 and skip the table. */
17253 if (tablejump_p (insn, NULL, &table))
17255 address += get_jump_table_size (table);
17256 insn = table;
17259 else if (LABEL_P (insn))
17260 /* Add the worst-case padding due to alignment. We don't add
17261 the _current_ padding because the minipool insertions
17262 themselves might change it. */
17263 address += get_label_padding (insn);
17266 fix = minipool_fix_head;
17268 /* Now scan the fixups and perform the required changes. */
17269 while (fix)
17271 Mfix * ftmp;
17272 Mfix * fdel;
17273 Mfix * last_added_fix;
17274 Mfix * last_barrier = NULL;
17275 Mfix * this_fix;
17277 /* Skip any further barriers before the next fix. */
17278 while (fix && BARRIER_P (fix->insn))
17279 fix = fix->next;
17281 /* No more fixes. */
17282 if (fix == NULL)
17283 break;
17285 last_added_fix = NULL;
17287 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17289 if (BARRIER_P (ftmp->insn))
17291 if (ftmp->address >= minipool_vector_head->max_address)
17292 break;
17294 last_barrier = ftmp;
17296 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17297 break;
17299 last_added_fix = ftmp; /* Keep track of the last fix added. */
17302 /* If we found a barrier, drop back to that; any fixes that we
17303 could have reached but come after the barrier will now go in
17304 the next mini-pool. */
17305 if (last_barrier != NULL)
17307 /* Reduce the refcount for those fixes that won't go into this
17308 pool after all. */
17309 for (fdel = last_barrier->next;
17310 fdel && fdel != ftmp;
17311 fdel = fdel->next)
17313 fdel->minipool->refcount--;
17314 fdel->minipool = NULL;
17317 ftmp = last_barrier;
17319 else
17321 /* ftmp is first fix that we can't fit into this pool and
17322 there no natural barriers that we could use. Insert a
17323 new barrier in the code somewhere between the previous
17324 fix and this one, and arrange to jump around it. */
17325 HOST_WIDE_INT max_address;
17327 /* The last item on the list of fixes must be a barrier, so
17328 we can never run off the end of the list of fixes without
17329 last_barrier being set. */
17330 gcc_assert (ftmp);
17332 max_address = minipool_vector_head->max_address;
17333 /* Check that there isn't another fix that is in range that
17334 we couldn't fit into this pool because the pool was
17335 already too large: we need to put the pool before such an
17336 instruction. The pool itself may come just after the
17337 fix because create_fix_barrier also allows space for a
17338 jump instruction. */
17339 if (ftmp->address < max_address)
17340 max_address = ftmp->address + 1;
17342 last_barrier = create_fix_barrier (last_added_fix, max_address);
17345 assign_minipool_offsets (last_barrier);
17347 while (ftmp)
17349 if (!BARRIER_P (ftmp->insn)
17350 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17351 == NULL))
17352 break;
17354 ftmp = ftmp->next;
17357 /* Scan over the fixes we have identified for this pool, fixing them
17358 up and adding the constants to the pool itself. */
17359 for (this_fix = fix; this_fix && ftmp != this_fix;
17360 this_fix = this_fix->next)
17361 if (!BARRIER_P (this_fix->insn))
17363 rtx addr
17364 = plus_constant (Pmode,
17365 gen_rtx_LABEL_REF (VOIDmode,
17366 minipool_vector_label),
17367 this_fix->minipool->offset);
17368 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17371 dump_minipool (last_barrier->insn);
17372 fix = ftmp;
17375 /* From now on we must synthesize any constants that we can't handle
17376 directly. This can happen if the RTL gets split during final
17377 instruction generation. */
17378 cfun->machine->after_arm_reorg = 1;
17380 /* Free the minipool memory. */
17381 obstack_free (&minipool_obstack, minipool_startobj);
17384 /* Routines to output assembly language. */
17386 /* If the rtx is the correct value then return the string of the number.
17387 In this way we can ensure that valid double constants are generated even
17388 when cross compiling. */
17389 const char *
17390 fp_immediate_constant (rtx x)
17392 REAL_VALUE_TYPE r;
17394 if (!fp_consts_inited)
17395 init_fp_table ();
17397 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17399 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17400 return "0";
17403 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17404 static const char *
17405 fp_const_from_val (REAL_VALUE_TYPE *r)
17407 if (!fp_consts_inited)
17408 init_fp_table ();
17410 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17411 return "0";
17414 /* OPERANDS[0] is the entire list of insns that constitute pop,
17415 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17416 is in the list, UPDATE is true iff the list contains explicit
17417 update of base register. */
17418 void
17419 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17420 bool update)
17422 int i;
17423 char pattern[100];
17424 int offset;
17425 const char *conditional;
17426 int num_saves = XVECLEN (operands[0], 0);
17427 unsigned int regno;
17428 unsigned int regno_base = REGNO (operands[1]);
17430 offset = 0;
17431 offset += update ? 1 : 0;
17432 offset += return_pc ? 1 : 0;
17434 /* Is the base register in the list? */
17435 for (i = offset; i < num_saves; i++)
17437 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17438 /* If SP is in the list, then the base register must be SP. */
17439 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17440 /* If base register is in the list, there must be no explicit update. */
17441 if (regno == regno_base)
17442 gcc_assert (!update);
17445 conditional = reverse ? "%?%D0" : "%?%d0";
17446 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17448 /* Output pop (not stmfd) because it has a shorter encoding. */
17449 gcc_assert (update);
17450 sprintf (pattern, "pop%s\t{", conditional);
17452 else
17454 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17455 It's just a convention, their semantics are identical. */
17456 if (regno_base == SP_REGNUM)
17457 sprintf (pattern, "ldm%sfd\t", conditional);
17458 else if (TARGET_UNIFIED_ASM)
17459 sprintf (pattern, "ldmia%s\t", conditional);
17460 else
17461 sprintf (pattern, "ldm%sia\t", conditional);
17463 strcat (pattern, reg_names[regno_base]);
17464 if (update)
17465 strcat (pattern, "!, {");
17466 else
17467 strcat (pattern, ", {");
17470 /* Output the first destination register. */
17471 strcat (pattern,
17472 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17474 /* Output the rest of the destination registers. */
17475 for (i = offset + 1; i < num_saves; i++)
17477 strcat (pattern, ", ");
17478 strcat (pattern,
17479 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17482 strcat (pattern, "}");
17484 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17485 strcat (pattern, "^");
17487 output_asm_insn (pattern, &cond);
17491 /* Output the assembly for a store multiple. */
17493 const char *
17494 vfp_output_fstmd (rtx * operands)
17496 char pattern[100];
17497 int p;
17498 int base;
17499 int i;
17501 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17502 p = strlen (pattern);
17504 gcc_assert (REG_P (operands[1]));
17506 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17507 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17509 p += sprintf (&pattern[p], ", d%d", base + i);
17511 strcpy (&pattern[p], "}");
17513 output_asm_insn (pattern, operands);
17514 return "";
17518 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17519 number of bytes pushed. */
17521 static int
17522 vfp_emit_fstmd (int base_reg, int count)
17524 rtx par;
17525 rtx dwarf;
17526 rtx tmp, reg;
17527 int i;
17529 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17530 register pairs are stored by a store multiple insn. We avoid this
17531 by pushing an extra pair. */
17532 if (count == 2 && !arm_arch6)
17534 if (base_reg == LAST_VFP_REGNUM - 3)
17535 base_reg -= 2;
17536 count++;
17539 /* FSTMD may not store more than 16 doubleword registers at once. Split
17540 larger stores into multiple parts (up to a maximum of two, in
17541 practice). */
17542 if (count > 16)
17544 int saved;
17545 /* NOTE: base_reg is an internal register number, so each D register
17546 counts as 2. */
17547 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17548 saved += vfp_emit_fstmd (base_reg, 16);
17549 return saved;
17552 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17553 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17555 reg = gen_rtx_REG (DFmode, base_reg);
17556 base_reg += 2;
17558 XVECEXP (par, 0, 0)
17559 = gen_rtx_SET (VOIDmode,
17560 gen_frame_mem
17561 (BLKmode,
17562 gen_rtx_PRE_MODIFY (Pmode,
17563 stack_pointer_rtx,
17564 plus_constant
17565 (Pmode, stack_pointer_rtx,
17566 - (count * 8)))
17568 gen_rtx_UNSPEC (BLKmode,
17569 gen_rtvec (1, reg),
17570 UNSPEC_PUSH_MULT));
17572 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17573 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17574 RTX_FRAME_RELATED_P (tmp) = 1;
17575 XVECEXP (dwarf, 0, 0) = tmp;
17577 tmp = gen_rtx_SET (VOIDmode,
17578 gen_frame_mem (DFmode, stack_pointer_rtx),
17579 reg);
17580 RTX_FRAME_RELATED_P (tmp) = 1;
17581 XVECEXP (dwarf, 0, 1) = tmp;
17583 for (i = 1; i < count; i++)
17585 reg = gen_rtx_REG (DFmode, base_reg);
17586 base_reg += 2;
17587 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17589 tmp = gen_rtx_SET (VOIDmode,
17590 gen_frame_mem (DFmode,
17591 plus_constant (Pmode,
17592 stack_pointer_rtx,
17593 i * 8)),
17594 reg);
17595 RTX_FRAME_RELATED_P (tmp) = 1;
17596 XVECEXP (dwarf, 0, i + 1) = tmp;
17599 par = emit_insn (par);
17600 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17601 RTX_FRAME_RELATED_P (par) = 1;
17603 return count * 8;
17606 /* Emit a call instruction with pattern PAT. ADDR is the address of
17607 the call target. */
17609 void
17610 arm_emit_call_insn (rtx pat, rtx addr)
17612 rtx insn;
17614 insn = emit_call_insn (pat);
17616 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17617 If the call might use such an entry, add a use of the PIC register
17618 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17619 if (TARGET_VXWORKS_RTP
17620 && flag_pic
17621 && GET_CODE (addr) == SYMBOL_REF
17622 && (SYMBOL_REF_DECL (addr)
17623 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17624 : !SYMBOL_REF_LOCAL_P (addr)))
17626 require_pic_register ();
17627 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17631 /* Output a 'call' insn. */
17632 const char *
17633 output_call (rtx *operands)
17635 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17637 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17638 if (REGNO (operands[0]) == LR_REGNUM)
17640 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17641 output_asm_insn ("mov%?\t%0, %|lr", operands);
17644 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17646 if (TARGET_INTERWORK || arm_arch4t)
17647 output_asm_insn ("bx%?\t%0", operands);
17648 else
17649 output_asm_insn ("mov%?\t%|pc, %0", operands);
17651 return "";
17654 /* Output a 'call' insn that is a reference in memory. This is
17655 disabled for ARMv5 and we prefer a blx instead because otherwise
17656 there's a significant performance overhead. */
17657 const char *
17658 output_call_mem (rtx *operands)
17660 gcc_assert (!arm_arch5);
17661 if (TARGET_INTERWORK)
17663 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17664 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17665 output_asm_insn ("bx%?\t%|ip", operands);
17667 else if (regno_use_in (LR_REGNUM, operands[0]))
17669 /* LR is used in the memory address. We load the address in the
17670 first instruction. It's safe to use IP as the target of the
17671 load since the call will kill it anyway. */
17672 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17673 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17674 if (arm_arch4t)
17675 output_asm_insn ("bx%?\t%|ip", operands);
17676 else
17677 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17679 else
17681 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17682 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17685 return "";
17689 /* Output a move from arm registers to arm registers of a long double
17690 OPERANDS[0] is the destination.
17691 OPERANDS[1] is the source. */
17692 const char *
17693 output_mov_long_double_arm_from_arm (rtx *operands)
17695 /* We have to be careful here because the two might overlap. */
17696 int dest_start = REGNO (operands[0]);
17697 int src_start = REGNO (operands[1]);
17698 rtx ops[2];
17699 int i;
17701 if (dest_start < src_start)
17703 for (i = 0; i < 3; i++)
17705 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17706 ops[1] = gen_rtx_REG (SImode, src_start + i);
17707 output_asm_insn ("mov%?\t%0, %1", ops);
17710 else
17712 for (i = 2; i >= 0; i--)
17714 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17715 ops[1] = gen_rtx_REG (SImode, src_start + i);
17716 output_asm_insn ("mov%?\t%0, %1", ops);
17720 return "";
17723 void
17724 arm_emit_movpair (rtx dest, rtx src)
17726 /* If the src is an immediate, simplify it. */
17727 if (CONST_INT_P (src))
17729 HOST_WIDE_INT val = INTVAL (src);
17730 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17731 if ((val >> 16) & 0x0000ffff)
17732 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17733 GEN_INT (16)),
17734 GEN_INT ((val >> 16) & 0x0000ffff));
17735 return;
17737 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17738 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17741 /* Output a move between double words. It must be REG<-MEM
17742 or MEM<-REG. */
17743 const char *
17744 output_move_double (rtx *operands, bool emit, int *count)
17746 enum rtx_code code0 = GET_CODE (operands[0]);
17747 enum rtx_code code1 = GET_CODE (operands[1]);
17748 rtx otherops[3];
17749 if (count)
17750 *count = 1;
17752 /* The only case when this might happen is when
17753 you are looking at the length of a DImode instruction
17754 that has an invalid constant in it. */
17755 if (code0 == REG && code1 != MEM)
17757 gcc_assert (!emit);
17758 *count = 2;
17759 return "";
17762 if (code0 == REG)
17764 unsigned int reg0 = REGNO (operands[0]);
17766 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17768 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17770 switch (GET_CODE (XEXP (operands[1], 0)))
17772 case REG:
17774 if (emit)
17776 if (TARGET_LDRD
17777 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17778 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17779 else
17780 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17782 break;
17784 case PRE_INC:
17785 gcc_assert (TARGET_LDRD);
17786 if (emit)
17787 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17788 break;
17790 case PRE_DEC:
17791 if (emit)
17793 if (TARGET_LDRD)
17794 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17795 else
17796 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17798 break;
17800 case POST_INC:
17801 if (emit)
17803 if (TARGET_LDRD)
17804 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17805 else
17806 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17808 break;
17810 case POST_DEC:
17811 gcc_assert (TARGET_LDRD);
17812 if (emit)
17813 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17814 break;
17816 case PRE_MODIFY:
17817 case POST_MODIFY:
17818 /* Autoicrement addressing modes should never have overlapping
17819 base and destination registers, and overlapping index registers
17820 are already prohibited, so this doesn't need to worry about
17821 fix_cm3_ldrd. */
17822 otherops[0] = operands[0];
17823 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17824 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17826 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17828 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17830 /* Registers overlap so split out the increment. */
17831 if (emit)
17833 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17834 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17836 if (count)
17837 *count = 2;
17839 else
17841 /* Use a single insn if we can.
17842 FIXME: IWMMXT allows offsets larger than ldrd can
17843 handle, fix these up with a pair of ldr. */
17844 if (TARGET_THUMB2
17845 || !CONST_INT_P (otherops[2])
17846 || (INTVAL (otherops[2]) > -256
17847 && INTVAL (otherops[2]) < 256))
17849 if (emit)
17850 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17852 else
17854 if (emit)
17856 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17857 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17859 if (count)
17860 *count = 2;
17865 else
17867 /* Use a single insn if we can.
17868 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17869 fix these up with a pair of ldr. */
17870 if (TARGET_THUMB2
17871 || !CONST_INT_P (otherops[2])
17872 || (INTVAL (otherops[2]) > -256
17873 && INTVAL (otherops[2]) < 256))
17875 if (emit)
17876 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17878 else
17880 if (emit)
17882 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17883 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17885 if (count)
17886 *count = 2;
17889 break;
17891 case LABEL_REF:
17892 case CONST:
17893 /* We might be able to use ldrd %0, %1 here. However the range is
17894 different to ldr/adr, and it is broken on some ARMv7-M
17895 implementations. */
17896 /* Use the second register of the pair to avoid problematic
17897 overlap. */
17898 otherops[1] = operands[1];
17899 if (emit)
17900 output_asm_insn ("adr%?\t%0, %1", otherops);
17901 operands[1] = otherops[0];
17902 if (emit)
17904 if (TARGET_LDRD)
17905 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17906 else
17907 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17910 if (count)
17911 *count = 2;
17912 break;
17914 /* ??? This needs checking for thumb2. */
17915 default:
17916 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17917 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17919 otherops[0] = operands[0];
17920 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17921 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17923 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17925 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17927 switch ((int) INTVAL (otherops[2]))
17929 case -8:
17930 if (emit)
17931 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17932 return "";
17933 case -4:
17934 if (TARGET_THUMB2)
17935 break;
17936 if (emit)
17937 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17938 return "";
17939 case 4:
17940 if (TARGET_THUMB2)
17941 break;
17942 if (emit)
17943 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17944 return "";
17947 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17948 operands[1] = otherops[0];
17949 if (TARGET_LDRD
17950 && (REG_P (otherops[2])
17951 || TARGET_THUMB2
17952 || (CONST_INT_P (otherops[2])
17953 && INTVAL (otherops[2]) > -256
17954 && INTVAL (otherops[2]) < 256)))
17956 if (reg_overlap_mentioned_p (operands[0],
17957 otherops[2]))
17959 rtx tmp;
17960 /* Swap base and index registers over to
17961 avoid a conflict. */
17962 tmp = otherops[1];
17963 otherops[1] = otherops[2];
17964 otherops[2] = tmp;
17966 /* If both registers conflict, it will usually
17967 have been fixed by a splitter. */
17968 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17969 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17971 if (emit)
17973 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17974 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17976 if (count)
17977 *count = 2;
17979 else
17981 otherops[0] = operands[0];
17982 if (emit)
17983 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17985 return "";
17988 if (CONST_INT_P (otherops[2]))
17990 if (emit)
17992 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17993 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17994 else
17995 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17998 else
18000 if (emit)
18001 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18004 else
18006 if (emit)
18007 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18010 if (count)
18011 *count = 2;
18013 if (TARGET_LDRD)
18014 return "ldr%(d%)\t%0, [%1]";
18016 return "ldm%(ia%)\t%1, %M0";
18018 else
18020 otherops[1] = adjust_address (operands[1], SImode, 4);
18021 /* Take care of overlapping base/data reg. */
18022 if (reg_mentioned_p (operands[0], operands[1]))
18024 if (emit)
18026 output_asm_insn ("ldr%?\t%0, %1", otherops);
18027 output_asm_insn ("ldr%?\t%0, %1", operands);
18029 if (count)
18030 *count = 2;
18033 else
18035 if (emit)
18037 output_asm_insn ("ldr%?\t%0, %1", operands);
18038 output_asm_insn ("ldr%?\t%0, %1", otherops);
18040 if (count)
18041 *count = 2;
18046 else
18048 /* Constraints should ensure this. */
18049 gcc_assert (code0 == MEM && code1 == REG);
18050 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18051 || (TARGET_ARM && TARGET_LDRD));
18053 switch (GET_CODE (XEXP (operands[0], 0)))
18055 case REG:
18056 if (emit)
18058 if (TARGET_LDRD)
18059 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18060 else
18061 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18063 break;
18065 case PRE_INC:
18066 gcc_assert (TARGET_LDRD);
18067 if (emit)
18068 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18069 break;
18071 case PRE_DEC:
18072 if (emit)
18074 if (TARGET_LDRD)
18075 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18076 else
18077 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18079 break;
18081 case POST_INC:
18082 if (emit)
18084 if (TARGET_LDRD)
18085 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18086 else
18087 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18089 break;
18091 case POST_DEC:
18092 gcc_assert (TARGET_LDRD);
18093 if (emit)
18094 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18095 break;
18097 case PRE_MODIFY:
18098 case POST_MODIFY:
18099 otherops[0] = operands[1];
18100 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18101 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18103 /* IWMMXT allows offsets larger than ldrd can handle,
18104 fix these up with a pair of ldr. */
18105 if (!TARGET_THUMB2
18106 && CONST_INT_P (otherops[2])
18107 && (INTVAL(otherops[2]) <= -256
18108 || INTVAL(otherops[2]) >= 256))
18110 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18112 if (emit)
18114 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18115 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18117 if (count)
18118 *count = 2;
18120 else
18122 if (emit)
18124 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18125 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18127 if (count)
18128 *count = 2;
18131 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18133 if (emit)
18134 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18136 else
18138 if (emit)
18139 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18141 break;
18143 case PLUS:
18144 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18145 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18147 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18149 case -8:
18150 if (emit)
18151 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18152 return "";
18154 case -4:
18155 if (TARGET_THUMB2)
18156 break;
18157 if (emit)
18158 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18159 return "";
18161 case 4:
18162 if (TARGET_THUMB2)
18163 break;
18164 if (emit)
18165 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18166 return "";
18169 if (TARGET_LDRD
18170 && (REG_P (otherops[2])
18171 || TARGET_THUMB2
18172 || (CONST_INT_P (otherops[2])
18173 && INTVAL (otherops[2]) > -256
18174 && INTVAL (otherops[2]) < 256)))
18176 otherops[0] = operands[1];
18177 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18178 if (emit)
18179 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18180 return "";
18182 /* Fall through */
18184 default:
18185 otherops[0] = adjust_address (operands[0], SImode, 4);
18186 otherops[1] = operands[1];
18187 if (emit)
18189 output_asm_insn ("str%?\t%1, %0", operands);
18190 output_asm_insn ("str%?\t%H1, %0", otherops);
18192 if (count)
18193 *count = 2;
18197 return "";
18200 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18201 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18203 const char *
18204 output_move_quad (rtx *operands)
18206 if (REG_P (operands[0]))
18208 /* Load, or reg->reg move. */
18210 if (MEM_P (operands[1]))
18212 switch (GET_CODE (XEXP (operands[1], 0)))
18214 case REG:
18215 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18216 break;
18218 case LABEL_REF:
18219 case CONST:
18220 output_asm_insn ("adr%?\t%0, %1", operands);
18221 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18222 break;
18224 default:
18225 gcc_unreachable ();
18228 else
18230 rtx ops[2];
18231 int dest, src, i;
18233 gcc_assert (REG_P (operands[1]));
18235 dest = REGNO (operands[0]);
18236 src = REGNO (operands[1]);
18238 /* This seems pretty dumb, but hopefully GCC won't try to do it
18239 very often. */
18240 if (dest < src)
18241 for (i = 0; i < 4; i++)
18243 ops[0] = gen_rtx_REG (SImode, dest + i);
18244 ops[1] = gen_rtx_REG (SImode, src + i);
18245 output_asm_insn ("mov%?\t%0, %1", ops);
18247 else
18248 for (i = 3; i >= 0; i--)
18250 ops[0] = gen_rtx_REG (SImode, dest + i);
18251 ops[1] = gen_rtx_REG (SImode, src + i);
18252 output_asm_insn ("mov%?\t%0, %1", ops);
18256 else
18258 gcc_assert (MEM_P (operands[0]));
18259 gcc_assert (REG_P (operands[1]));
18260 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18262 switch (GET_CODE (XEXP (operands[0], 0)))
18264 case REG:
18265 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18266 break;
18268 default:
18269 gcc_unreachable ();
18273 return "";
18276 /* Output a VFP load or store instruction. */
18278 const char *
18279 output_move_vfp (rtx *operands)
18281 rtx reg, mem, addr, ops[2];
18282 int load = REG_P (operands[0]);
18283 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18284 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18285 const char *templ;
18286 char buff[50];
18287 enum machine_mode mode;
18289 reg = operands[!load];
18290 mem = operands[load];
18292 mode = GET_MODE (reg);
18294 gcc_assert (REG_P (reg));
18295 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18296 gcc_assert (mode == SFmode
18297 || mode == DFmode
18298 || mode == SImode
18299 || mode == DImode
18300 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18301 gcc_assert (MEM_P (mem));
18303 addr = XEXP (mem, 0);
18305 switch (GET_CODE (addr))
18307 case PRE_DEC:
18308 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18309 ops[0] = XEXP (addr, 0);
18310 ops[1] = reg;
18311 break;
18313 case POST_INC:
18314 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18315 ops[0] = XEXP (addr, 0);
18316 ops[1] = reg;
18317 break;
18319 default:
18320 templ = "f%s%c%%?\t%%%s0, %%1%s";
18321 ops[0] = reg;
18322 ops[1] = mem;
18323 break;
18326 sprintf (buff, templ,
18327 load ? "ld" : "st",
18328 dp ? 'd' : 's',
18329 dp ? "P" : "",
18330 integer_p ? "\t%@ int" : "");
18331 output_asm_insn (buff, ops);
18333 return "";
18336 /* Output a Neon double-word or quad-word load or store, or a load
18337 or store for larger structure modes.
18339 WARNING: The ordering of elements is weird in big-endian mode,
18340 because the EABI requires that vectors stored in memory appear
18341 as though they were stored by a VSTM, as required by the EABI.
18342 GCC RTL defines element ordering based on in-memory order.
18343 This can be different from the architectural ordering of elements
18344 within a NEON register. The intrinsics defined in arm_neon.h use the
18345 NEON register element ordering, not the GCC RTL element ordering.
18347 For example, the in-memory ordering of a big-endian a quadword
18348 vector with 16-bit elements when stored from register pair {d0,d1}
18349 will be (lowest address first, d0[N] is NEON register element N):
18351 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18353 When necessary, quadword registers (dN, dN+1) are moved to ARM
18354 registers from rN in the order:
18356 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18358 So that STM/LDM can be used on vectors in ARM registers, and the
18359 same memory layout will result as if VSTM/VLDM were used.
18361 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18362 possible, which allows use of appropriate alignment tags.
18363 Note that the choice of "64" is independent of the actual vector
18364 element size; this size simply ensures that the behavior is
18365 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18367 Due to limitations of those instructions, use of VST1.64/VLD1.64
18368 is not possible if:
18369 - the address contains PRE_DEC, or
18370 - the mode refers to more than 4 double-word registers
18372 In those cases, it would be possible to replace VSTM/VLDM by a
18373 sequence of instructions; this is not currently implemented since
18374 this is not certain to actually improve performance. */
18376 const char *
18377 output_move_neon (rtx *operands)
18379 rtx reg, mem, addr, ops[2];
18380 int regno, nregs, load = REG_P (operands[0]);
18381 const char *templ;
18382 char buff[50];
18383 enum machine_mode mode;
18385 reg = operands[!load];
18386 mem = operands[load];
18388 mode = GET_MODE (reg);
18390 gcc_assert (REG_P (reg));
18391 regno = REGNO (reg);
18392 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18393 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18394 || NEON_REGNO_OK_FOR_QUAD (regno));
18395 gcc_assert (VALID_NEON_DREG_MODE (mode)
18396 || VALID_NEON_QREG_MODE (mode)
18397 || VALID_NEON_STRUCT_MODE (mode));
18398 gcc_assert (MEM_P (mem));
18400 addr = XEXP (mem, 0);
18402 /* Strip off const from addresses like (const (plus (...))). */
18403 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18404 addr = XEXP (addr, 0);
18406 switch (GET_CODE (addr))
18408 case POST_INC:
18409 /* We have to use vldm / vstm for too-large modes. */
18410 if (nregs > 4)
18412 templ = "v%smia%%?\t%%0!, %%h1";
18413 ops[0] = XEXP (addr, 0);
18415 else
18417 templ = "v%s1.64\t%%h1, %%A0";
18418 ops[0] = mem;
18420 ops[1] = reg;
18421 break;
18423 case PRE_DEC:
18424 /* We have to use vldm / vstm in this case, since there is no
18425 pre-decrement form of the vld1 / vst1 instructions. */
18426 templ = "v%smdb%%?\t%%0!, %%h1";
18427 ops[0] = XEXP (addr, 0);
18428 ops[1] = reg;
18429 break;
18431 case POST_MODIFY:
18432 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18433 gcc_unreachable ();
18435 case LABEL_REF:
18436 case PLUS:
18438 int i;
18439 int overlap = -1;
18440 for (i = 0; i < nregs; i++)
18442 /* We're only using DImode here because it's a convenient size. */
18443 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18444 ops[1] = adjust_address (mem, DImode, 8 * i);
18445 if (reg_overlap_mentioned_p (ops[0], mem))
18447 gcc_assert (overlap == -1);
18448 overlap = i;
18450 else
18452 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18453 output_asm_insn (buff, ops);
18456 if (overlap != -1)
18458 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18459 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18460 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18461 output_asm_insn (buff, ops);
18464 return "";
18467 default:
18468 /* We have to use vldm / vstm for too-large modes. */
18469 if (nregs > 4)
18470 templ = "v%smia%%?\t%%m0, %%h1";
18471 else
18472 templ = "v%s1.64\t%%h1, %%A0";
18474 ops[0] = mem;
18475 ops[1] = reg;
18478 sprintf (buff, templ, load ? "ld" : "st");
18479 output_asm_insn (buff, ops);
18481 return "";
18484 /* Compute and return the length of neon_mov<mode>, where <mode> is
18485 one of VSTRUCT modes: EI, OI, CI or XI. */
18487 arm_attr_length_move_neon (rtx insn)
18489 rtx reg, mem, addr;
18490 int load;
18491 enum machine_mode mode;
18493 extract_insn_cached (insn);
18495 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18497 mode = GET_MODE (recog_data.operand[0]);
18498 switch (mode)
18500 case EImode:
18501 case OImode:
18502 return 8;
18503 case CImode:
18504 return 12;
18505 case XImode:
18506 return 16;
18507 default:
18508 gcc_unreachable ();
18512 load = REG_P (recog_data.operand[0]);
18513 reg = recog_data.operand[!load];
18514 mem = recog_data.operand[load];
18516 gcc_assert (MEM_P (mem));
18518 mode = GET_MODE (reg);
18519 addr = XEXP (mem, 0);
18521 /* Strip off const from addresses like (const (plus (...))). */
18522 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18523 addr = XEXP (addr, 0);
18525 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18527 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18528 return insns * 4;
18530 else
18531 return 4;
18534 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18535 return zero. */
18538 arm_address_offset_is_imm (rtx insn)
18540 rtx mem, addr;
18542 extract_insn_cached (insn);
18544 if (REG_P (recog_data.operand[0]))
18545 return 0;
18547 mem = recog_data.operand[0];
18549 gcc_assert (MEM_P (mem));
18551 addr = XEXP (mem, 0);
18553 if (REG_P (addr)
18554 || (GET_CODE (addr) == PLUS
18555 && REG_P (XEXP (addr, 0))
18556 && CONST_INT_P (XEXP (addr, 1))))
18557 return 1;
18558 else
18559 return 0;
18562 /* Output an ADD r, s, #n where n may be too big for one instruction.
18563 If adding zero to one register, output nothing. */
18564 const char *
18565 output_add_immediate (rtx *operands)
18567 HOST_WIDE_INT n = INTVAL (operands[2]);
18569 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18571 if (n < 0)
18572 output_multi_immediate (operands,
18573 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18574 -n);
18575 else
18576 output_multi_immediate (operands,
18577 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18581 return "";
18584 /* Output a multiple immediate operation.
18585 OPERANDS is the vector of operands referred to in the output patterns.
18586 INSTR1 is the output pattern to use for the first constant.
18587 INSTR2 is the output pattern to use for subsequent constants.
18588 IMMED_OP is the index of the constant slot in OPERANDS.
18589 N is the constant value. */
18590 static const char *
18591 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18592 int immed_op, HOST_WIDE_INT n)
18594 #if HOST_BITS_PER_WIDE_INT > 32
18595 n &= 0xffffffff;
18596 #endif
18598 if (n == 0)
18600 /* Quick and easy output. */
18601 operands[immed_op] = const0_rtx;
18602 output_asm_insn (instr1, operands);
18604 else
18606 int i;
18607 const char * instr = instr1;
18609 /* Note that n is never zero here (which would give no output). */
18610 for (i = 0; i < 32; i += 2)
18612 if (n & (3 << i))
18614 operands[immed_op] = GEN_INT (n & (255 << i));
18615 output_asm_insn (instr, operands);
18616 instr = instr2;
18617 i += 6;
18622 return "";
18625 /* Return the name of a shifter operation. */
18626 static const char *
18627 arm_shift_nmem(enum rtx_code code)
18629 switch (code)
18631 case ASHIFT:
18632 return ARM_LSL_NAME;
18634 case ASHIFTRT:
18635 return "asr";
18637 case LSHIFTRT:
18638 return "lsr";
18640 case ROTATERT:
18641 return "ror";
18643 default:
18644 abort();
18648 /* Return the appropriate ARM instruction for the operation code.
18649 The returned result should not be overwritten. OP is the rtx of the
18650 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18651 was shifted. */
18652 const char *
18653 arithmetic_instr (rtx op, int shift_first_arg)
18655 switch (GET_CODE (op))
18657 case PLUS:
18658 return "add";
18660 case MINUS:
18661 return shift_first_arg ? "rsb" : "sub";
18663 case IOR:
18664 return "orr";
18666 case XOR:
18667 return "eor";
18669 case AND:
18670 return "and";
18672 case ASHIFT:
18673 case ASHIFTRT:
18674 case LSHIFTRT:
18675 case ROTATERT:
18676 return arm_shift_nmem(GET_CODE(op));
18678 default:
18679 gcc_unreachable ();
18683 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18684 for the operation code. The returned result should not be overwritten.
18685 OP is the rtx code of the shift.
18686 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18687 shift. */
18688 static const char *
18689 shift_op (rtx op, HOST_WIDE_INT *amountp)
18691 const char * mnem;
18692 enum rtx_code code = GET_CODE (op);
18694 switch (code)
18696 case ROTATE:
18697 if (!CONST_INT_P (XEXP (op, 1)))
18699 output_operand_lossage ("invalid shift operand");
18700 return NULL;
18703 code = ROTATERT;
18704 *amountp = 32 - INTVAL (XEXP (op, 1));
18705 mnem = "ror";
18706 break;
18708 case ASHIFT:
18709 case ASHIFTRT:
18710 case LSHIFTRT:
18711 case ROTATERT:
18712 mnem = arm_shift_nmem(code);
18713 if (CONST_INT_P (XEXP (op, 1)))
18715 *amountp = INTVAL (XEXP (op, 1));
18717 else if (REG_P (XEXP (op, 1)))
18719 *amountp = -1;
18720 return mnem;
18722 else
18724 output_operand_lossage ("invalid shift operand");
18725 return NULL;
18727 break;
18729 case MULT:
18730 /* We never have to worry about the amount being other than a
18731 power of 2, since this case can never be reloaded from a reg. */
18732 if (!CONST_INT_P (XEXP (op, 1)))
18734 output_operand_lossage ("invalid shift operand");
18735 return NULL;
18738 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18740 /* Amount must be a power of two. */
18741 if (*amountp & (*amountp - 1))
18743 output_operand_lossage ("invalid shift operand");
18744 return NULL;
18747 *amountp = int_log2 (*amountp);
18748 return ARM_LSL_NAME;
18750 default:
18751 output_operand_lossage ("invalid shift operand");
18752 return NULL;
18755 /* This is not 100% correct, but follows from the desire to merge
18756 multiplication by a power of 2 with the recognizer for a
18757 shift. >=32 is not a valid shift for "lsl", so we must try and
18758 output a shift that produces the correct arithmetical result.
18759 Using lsr #32 is identical except for the fact that the carry bit
18760 is not set correctly if we set the flags; but we never use the
18761 carry bit from such an operation, so we can ignore that. */
18762 if (code == ROTATERT)
18763 /* Rotate is just modulo 32. */
18764 *amountp &= 31;
18765 else if (*amountp != (*amountp & 31))
18767 if (code == ASHIFT)
18768 mnem = "lsr";
18769 *amountp = 32;
18772 /* Shifts of 0 are no-ops. */
18773 if (*amountp == 0)
18774 return NULL;
18776 return mnem;
18779 /* Obtain the shift from the POWER of two. */
18781 static HOST_WIDE_INT
18782 int_log2 (HOST_WIDE_INT power)
18784 HOST_WIDE_INT shift = 0;
18786 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18788 gcc_assert (shift <= 31);
18789 shift++;
18792 return shift;
18795 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18796 because /bin/as is horribly restrictive. The judgement about
18797 whether or not each character is 'printable' (and can be output as
18798 is) or not (and must be printed with an octal escape) must be made
18799 with reference to the *host* character set -- the situation is
18800 similar to that discussed in the comments above pp_c_char in
18801 c-pretty-print.c. */
18803 #define MAX_ASCII_LEN 51
18805 void
18806 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18808 int i;
18809 int len_so_far = 0;
18811 fputs ("\t.ascii\t\"", stream);
18813 for (i = 0; i < len; i++)
18815 int c = p[i];
18817 if (len_so_far >= MAX_ASCII_LEN)
18819 fputs ("\"\n\t.ascii\t\"", stream);
18820 len_so_far = 0;
18823 if (ISPRINT (c))
18825 if (c == '\\' || c == '\"')
18827 putc ('\\', stream);
18828 len_so_far++;
18830 putc (c, stream);
18831 len_so_far++;
18833 else
18835 fprintf (stream, "\\%03o", c);
18836 len_so_far += 4;
18840 fputs ("\"\n", stream);
18843 /* Compute the register save mask for registers 0 through 12
18844 inclusive. This code is used by arm_compute_save_reg_mask. */
18846 static unsigned long
18847 arm_compute_save_reg0_reg12_mask (void)
18849 unsigned long func_type = arm_current_func_type ();
18850 unsigned long save_reg_mask = 0;
18851 unsigned int reg;
18853 if (IS_INTERRUPT (func_type))
18855 unsigned int max_reg;
18856 /* Interrupt functions must not corrupt any registers,
18857 even call clobbered ones. If this is a leaf function
18858 we can just examine the registers used by the RTL, but
18859 otherwise we have to assume that whatever function is
18860 called might clobber anything, and so we have to save
18861 all the call-clobbered registers as well. */
18862 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18863 /* FIQ handlers have registers r8 - r12 banked, so
18864 we only need to check r0 - r7, Normal ISRs only
18865 bank r14 and r15, so we must check up to r12.
18866 r13 is the stack pointer which is always preserved,
18867 so we do not need to consider it here. */
18868 max_reg = 7;
18869 else
18870 max_reg = 12;
18872 for (reg = 0; reg <= max_reg; reg++)
18873 if (df_regs_ever_live_p (reg)
18874 || (! crtl->is_leaf && call_used_regs[reg]))
18875 save_reg_mask |= (1 << reg);
18877 /* Also save the pic base register if necessary. */
18878 if (flag_pic
18879 && !TARGET_SINGLE_PIC_BASE
18880 && arm_pic_register != INVALID_REGNUM
18881 && crtl->uses_pic_offset_table)
18882 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18884 else if (IS_VOLATILE(func_type))
18886 /* For noreturn functions we historically omitted register saves
18887 altogether. However this really messes up debugging. As a
18888 compromise save just the frame pointers. Combined with the link
18889 register saved elsewhere this should be sufficient to get
18890 a backtrace. */
18891 if (frame_pointer_needed)
18892 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18893 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18894 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18895 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18896 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18898 else
18900 /* In the normal case we only need to save those registers
18901 which are call saved and which are used by this function. */
18902 for (reg = 0; reg <= 11; reg++)
18903 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18904 save_reg_mask |= (1 << reg);
18906 /* Handle the frame pointer as a special case. */
18907 if (frame_pointer_needed)
18908 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18910 /* If we aren't loading the PIC register,
18911 don't stack it even though it may be live. */
18912 if (flag_pic
18913 && !TARGET_SINGLE_PIC_BASE
18914 && arm_pic_register != INVALID_REGNUM
18915 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18916 || crtl->uses_pic_offset_table))
18917 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18919 /* The prologue will copy SP into R0, so save it. */
18920 if (IS_STACKALIGN (func_type))
18921 save_reg_mask |= 1;
18924 /* Save registers so the exception handler can modify them. */
18925 if (crtl->calls_eh_return)
18927 unsigned int i;
18929 for (i = 0; ; i++)
18931 reg = EH_RETURN_DATA_REGNO (i);
18932 if (reg == INVALID_REGNUM)
18933 break;
18934 save_reg_mask |= 1 << reg;
18938 return save_reg_mask;
18941 /* Return true if r3 is live at the start of the function. */
18943 static bool
18944 arm_r3_live_at_start_p (void)
18946 /* Just look at cfg info, which is still close enough to correct at this
18947 point. This gives false positives for broken functions that might use
18948 uninitialized data that happens to be allocated in r3, but who cares? */
18949 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18952 /* Compute the number of bytes used to store the static chain register on the
18953 stack, above the stack frame. We need to know this accurately to get the
18954 alignment of the rest of the stack frame correct. */
18956 static int
18957 arm_compute_static_chain_stack_bytes (void)
18959 /* See the defining assertion in arm_expand_prologue. */
18960 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18961 && IS_NESTED (arm_current_func_type ())
18962 && arm_r3_live_at_start_p ()
18963 && crtl->args.pretend_args_size == 0)
18964 return 4;
18966 return 0;
18969 /* Compute a bit mask of which registers need to be
18970 saved on the stack for the current function.
18971 This is used by arm_get_frame_offsets, which may add extra registers. */
18973 static unsigned long
18974 arm_compute_save_reg_mask (void)
18976 unsigned int save_reg_mask = 0;
18977 unsigned long func_type = arm_current_func_type ();
18978 unsigned int reg;
18980 if (IS_NAKED (func_type))
18981 /* This should never really happen. */
18982 return 0;
18984 /* If we are creating a stack frame, then we must save the frame pointer,
18985 IP (which will hold the old stack pointer), LR and the PC. */
18986 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18987 save_reg_mask |=
18988 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18989 | (1 << IP_REGNUM)
18990 | (1 << LR_REGNUM)
18991 | (1 << PC_REGNUM);
18993 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18995 /* Decide if we need to save the link register.
18996 Interrupt routines have their own banked link register,
18997 so they never need to save it.
18998 Otherwise if we do not use the link register we do not need to save
18999 it. If we are pushing other registers onto the stack however, we
19000 can save an instruction in the epilogue by pushing the link register
19001 now and then popping it back into the PC. This incurs extra memory
19002 accesses though, so we only do it when optimizing for size, and only
19003 if we know that we will not need a fancy return sequence. */
19004 if (df_regs_ever_live_p (LR_REGNUM)
19005 || (save_reg_mask
19006 && optimize_size
19007 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19008 && !crtl->calls_eh_return))
19009 save_reg_mask |= 1 << LR_REGNUM;
19011 if (cfun->machine->lr_save_eliminated)
19012 save_reg_mask &= ~ (1 << LR_REGNUM);
19014 if (TARGET_REALLY_IWMMXT
19015 && ((bit_count (save_reg_mask)
19016 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19017 arm_compute_static_chain_stack_bytes())
19018 ) % 2) != 0)
19020 /* The total number of registers that are going to be pushed
19021 onto the stack is odd. We need to ensure that the stack
19022 is 64-bit aligned before we start to save iWMMXt registers,
19023 and also before we start to create locals. (A local variable
19024 might be a double or long long which we will load/store using
19025 an iWMMXt instruction). Therefore we need to push another
19026 ARM register, so that the stack will be 64-bit aligned. We
19027 try to avoid using the arg registers (r0 -r3) as they might be
19028 used to pass values in a tail call. */
19029 for (reg = 4; reg <= 12; reg++)
19030 if ((save_reg_mask & (1 << reg)) == 0)
19031 break;
19033 if (reg <= 12)
19034 save_reg_mask |= (1 << reg);
19035 else
19037 cfun->machine->sibcall_blocked = 1;
19038 save_reg_mask |= (1 << 3);
19042 /* We may need to push an additional register for use initializing the
19043 PIC base register. */
19044 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19045 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19047 reg = thumb_find_work_register (1 << 4);
19048 if (!call_used_regs[reg])
19049 save_reg_mask |= (1 << reg);
19052 return save_reg_mask;
19056 /* Compute a bit mask of which registers need to be
19057 saved on the stack for the current function. */
19058 static unsigned long
19059 thumb1_compute_save_reg_mask (void)
19061 unsigned long mask;
19062 unsigned reg;
19064 mask = 0;
19065 for (reg = 0; reg < 12; reg ++)
19066 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19067 mask |= 1 << reg;
19069 if (flag_pic
19070 && !TARGET_SINGLE_PIC_BASE
19071 && arm_pic_register != INVALID_REGNUM
19072 && crtl->uses_pic_offset_table)
19073 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19075 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19076 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19077 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19079 /* LR will also be pushed if any lo regs are pushed. */
19080 if (mask & 0xff || thumb_force_lr_save ())
19081 mask |= (1 << LR_REGNUM);
19083 /* Make sure we have a low work register if we need one.
19084 We will need one if we are going to push a high register,
19085 but we are not currently intending to push a low register. */
19086 if ((mask & 0xff) == 0
19087 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19089 /* Use thumb_find_work_register to choose which register
19090 we will use. If the register is live then we will
19091 have to push it. Use LAST_LO_REGNUM as our fallback
19092 choice for the register to select. */
19093 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19094 /* Make sure the register returned by thumb_find_work_register is
19095 not part of the return value. */
19096 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19097 reg = LAST_LO_REGNUM;
19099 if (! call_used_regs[reg])
19100 mask |= 1 << reg;
19103 /* The 504 below is 8 bytes less than 512 because there are two possible
19104 alignment words. We can't tell here if they will be present or not so we
19105 have to play it safe and assume that they are. */
19106 if ((CALLER_INTERWORKING_SLOT_SIZE +
19107 ROUND_UP_WORD (get_frame_size ()) +
19108 crtl->outgoing_args_size) >= 504)
19110 /* This is the same as the code in thumb1_expand_prologue() which
19111 determines which register to use for stack decrement. */
19112 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19113 if (mask & (1 << reg))
19114 break;
19116 if (reg > LAST_LO_REGNUM)
19118 /* Make sure we have a register available for stack decrement. */
19119 mask |= 1 << LAST_LO_REGNUM;
19123 return mask;
19127 /* Return the number of bytes required to save VFP registers. */
19128 static int
19129 arm_get_vfp_saved_size (void)
19131 unsigned int regno;
19132 int count;
19133 int saved;
19135 saved = 0;
19136 /* Space for saved VFP registers. */
19137 if (TARGET_HARD_FLOAT && TARGET_VFP)
19139 count = 0;
19140 for (regno = FIRST_VFP_REGNUM;
19141 regno < LAST_VFP_REGNUM;
19142 regno += 2)
19144 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19145 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19147 if (count > 0)
19149 /* Workaround ARM10 VFPr1 bug. */
19150 if (count == 2 && !arm_arch6)
19151 count++;
19152 saved += count * 8;
19154 count = 0;
19156 else
19157 count++;
19159 if (count > 0)
19161 if (count == 2 && !arm_arch6)
19162 count++;
19163 saved += count * 8;
19166 return saved;
19170 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19171 everything bar the final return instruction. If simple_return is true,
19172 then do not output epilogue, because it has already been emitted in RTL. */
19173 const char *
19174 output_return_instruction (rtx operand, bool really_return, bool reverse,
19175 bool simple_return)
19177 char conditional[10];
19178 char instr[100];
19179 unsigned reg;
19180 unsigned long live_regs_mask;
19181 unsigned long func_type;
19182 arm_stack_offsets *offsets;
19184 func_type = arm_current_func_type ();
19186 if (IS_NAKED (func_type))
19187 return "";
19189 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19191 /* If this function was declared non-returning, and we have
19192 found a tail call, then we have to trust that the called
19193 function won't return. */
19194 if (really_return)
19196 rtx ops[2];
19198 /* Otherwise, trap an attempted return by aborting. */
19199 ops[0] = operand;
19200 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19201 : "abort");
19202 assemble_external_libcall (ops[1]);
19203 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19206 return "";
19209 gcc_assert (!cfun->calls_alloca || really_return);
19211 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19213 cfun->machine->return_used_this_function = 1;
19215 offsets = arm_get_frame_offsets ();
19216 live_regs_mask = offsets->saved_regs_mask;
19218 if (!simple_return && live_regs_mask)
19220 const char * return_reg;
19222 /* If we do not have any special requirements for function exit
19223 (e.g. interworking) then we can load the return address
19224 directly into the PC. Otherwise we must load it into LR. */
19225 if (really_return
19226 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19227 return_reg = reg_names[PC_REGNUM];
19228 else
19229 return_reg = reg_names[LR_REGNUM];
19231 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19233 /* There are three possible reasons for the IP register
19234 being saved. 1) a stack frame was created, in which case
19235 IP contains the old stack pointer, or 2) an ISR routine
19236 corrupted it, or 3) it was saved to align the stack on
19237 iWMMXt. In case 1, restore IP into SP, otherwise just
19238 restore IP. */
19239 if (frame_pointer_needed)
19241 live_regs_mask &= ~ (1 << IP_REGNUM);
19242 live_regs_mask |= (1 << SP_REGNUM);
19244 else
19245 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19248 /* On some ARM architectures it is faster to use LDR rather than
19249 LDM to load a single register. On other architectures, the
19250 cost is the same. In 26 bit mode, or for exception handlers,
19251 we have to use LDM to load the PC so that the CPSR is also
19252 restored. */
19253 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19254 if (live_regs_mask == (1U << reg))
19255 break;
19257 if (reg <= LAST_ARM_REGNUM
19258 && (reg != LR_REGNUM
19259 || ! really_return
19260 || ! IS_INTERRUPT (func_type)))
19262 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19263 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19265 else
19267 char *p;
19268 int first = 1;
19270 /* Generate the load multiple instruction to restore the
19271 registers. Note we can get here, even if
19272 frame_pointer_needed is true, but only if sp already
19273 points to the base of the saved core registers. */
19274 if (live_regs_mask & (1 << SP_REGNUM))
19276 unsigned HOST_WIDE_INT stack_adjust;
19278 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19279 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19281 if (stack_adjust && arm_arch5 && TARGET_ARM)
19282 if (TARGET_UNIFIED_ASM)
19283 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19284 else
19285 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19286 else
19288 /* If we can't use ldmib (SA110 bug),
19289 then try to pop r3 instead. */
19290 if (stack_adjust)
19291 live_regs_mask |= 1 << 3;
19293 if (TARGET_UNIFIED_ASM)
19294 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19295 else
19296 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19299 else
19300 if (TARGET_UNIFIED_ASM)
19301 sprintf (instr, "pop%s\t{", conditional);
19302 else
19303 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19305 p = instr + strlen (instr);
19307 for (reg = 0; reg <= SP_REGNUM; reg++)
19308 if (live_regs_mask & (1 << reg))
19310 int l = strlen (reg_names[reg]);
19312 if (first)
19313 first = 0;
19314 else
19316 memcpy (p, ", ", 2);
19317 p += 2;
19320 memcpy (p, "%|", 2);
19321 memcpy (p + 2, reg_names[reg], l);
19322 p += l + 2;
19325 if (live_regs_mask & (1 << LR_REGNUM))
19327 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19328 /* If returning from an interrupt, restore the CPSR. */
19329 if (IS_INTERRUPT (func_type))
19330 strcat (p, "^");
19332 else
19333 strcpy (p, "}");
19336 output_asm_insn (instr, & operand);
19338 /* See if we need to generate an extra instruction to
19339 perform the actual function return. */
19340 if (really_return
19341 && func_type != ARM_FT_INTERWORKED
19342 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19344 /* The return has already been handled
19345 by loading the LR into the PC. */
19346 return "";
19350 if (really_return)
19352 switch ((int) ARM_FUNC_TYPE (func_type))
19354 case ARM_FT_ISR:
19355 case ARM_FT_FIQ:
19356 /* ??? This is wrong for unified assembly syntax. */
19357 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19358 break;
19360 case ARM_FT_INTERWORKED:
19361 sprintf (instr, "bx%s\t%%|lr", conditional);
19362 break;
19364 case ARM_FT_EXCEPTION:
19365 /* ??? This is wrong for unified assembly syntax. */
19366 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19367 break;
19369 default:
19370 /* Use bx if it's available. */
19371 if (arm_arch5 || arm_arch4t)
19372 sprintf (instr, "bx%s\t%%|lr", conditional);
19373 else
19374 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19375 break;
19378 output_asm_insn (instr, & operand);
19381 return "";
19384 /* Write the function name into the code section, directly preceding
19385 the function prologue.
19387 Code will be output similar to this:
19389 .ascii "arm_poke_function_name", 0
19390 .align
19392 .word 0xff000000 + (t1 - t0)
19393 arm_poke_function_name
19394 mov ip, sp
19395 stmfd sp!, {fp, ip, lr, pc}
19396 sub fp, ip, #4
19398 When performing a stack backtrace, code can inspect the value
19399 of 'pc' stored at 'fp' + 0. If the trace function then looks
19400 at location pc - 12 and the top 8 bits are set, then we know
19401 that there is a function name embedded immediately preceding this
19402 location and has length ((pc[-3]) & 0xff000000).
19404 We assume that pc is declared as a pointer to an unsigned long.
19406 It is of no benefit to output the function name if we are assembling
19407 a leaf function. These function types will not contain a stack
19408 backtrace structure, therefore it is not possible to determine the
19409 function name. */
19410 void
19411 arm_poke_function_name (FILE *stream, const char *name)
19413 unsigned long alignlength;
19414 unsigned long length;
19415 rtx x;
19417 length = strlen (name) + 1;
19418 alignlength = ROUND_UP_WORD (length);
19420 ASM_OUTPUT_ASCII (stream, name, length);
19421 ASM_OUTPUT_ALIGN (stream, 2);
19422 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19423 assemble_aligned_integer (UNITS_PER_WORD, x);
19426 /* Place some comments into the assembler stream
19427 describing the current function. */
19428 static void
19429 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19431 unsigned long func_type;
19433 /* ??? Do we want to print some of the below anyway? */
19434 if (TARGET_THUMB1)
19435 return;
19437 /* Sanity check. */
19438 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19440 func_type = arm_current_func_type ();
19442 switch ((int) ARM_FUNC_TYPE (func_type))
19444 default:
19445 case ARM_FT_NORMAL:
19446 break;
19447 case ARM_FT_INTERWORKED:
19448 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19449 break;
19450 case ARM_FT_ISR:
19451 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19452 break;
19453 case ARM_FT_FIQ:
19454 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19455 break;
19456 case ARM_FT_EXCEPTION:
19457 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19458 break;
19461 if (IS_NAKED (func_type))
19462 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19464 if (IS_VOLATILE (func_type))
19465 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19467 if (IS_NESTED (func_type))
19468 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19469 if (IS_STACKALIGN (func_type))
19470 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19472 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19473 crtl->args.size,
19474 crtl->args.pretend_args_size, frame_size);
19476 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19477 frame_pointer_needed,
19478 cfun->machine->uses_anonymous_args);
19480 if (cfun->machine->lr_save_eliminated)
19481 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19483 if (crtl->calls_eh_return)
19484 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19488 static void
19489 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19490 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19492 arm_stack_offsets *offsets;
19494 if (TARGET_THUMB1)
19496 int regno;
19498 /* Emit any call-via-reg trampolines that are needed for v4t support
19499 of call_reg and call_value_reg type insns. */
19500 for (regno = 0; regno < LR_REGNUM; regno++)
19502 rtx label = cfun->machine->call_via[regno];
19504 if (label != NULL)
19506 switch_to_section (function_section (current_function_decl));
19507 targetm.asm_out.internal_label (asm_out_file, "L",
19508 CODE_LABEL_NUMBER (label));
19509 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19513 /* ??? Probably not safe to set this here, since it assumes that a
19514 function will be emitted as assembly immediately after we generate
19515 RTL for it. This does not happen for inline functions. */
19516 cfun->machine->return_used_this_function = 0;
19518 else /* TARGET_32BIT */
19520 /* We need to take into account any stack-frame rounding. */
19521 offsets = arm_get_frame_offsets ();
19523 gcc_assert (!use_return_insn (FALSE, NULL)
19524 || (cfun->machine->return_used_this_function != 0)
19525 || offsets->saved_regs == offsets->outgoing_args
19526 || frame_pointer_needed);
19530 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19531 STR and STRD. If an even number of registers are being pushed, one
19532 or more STRD patterns are created for each register pair. If an
19533 odd number of registers are pushed, emit an initial STR followed by
19534 as many STRD instructions as are needed. This works best when the
19535 stack is initially 64-bit aligned (the normal case), since it
19536 ensures that each STRD is also 64-bit aligned. */
19537 static void
19538 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19540 int num_regs = 0;
19541 int i;
19542 int regno;
19543 rtx par = NULL_RTX;
19544 rtx dwarf = NULL_RTX;
19545 rtx tmp;
19546 bool first = true;
19548 num_regs = bit_count (saved_regs_mask);
19550 /* Must be at least one register to save, and can't save SP or PC. */
19551 gcc_assert (num_regs > 0 && num_regs <= 14);
19552 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19553 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19555 /* Create sequence for DWARF info. All the frame-related data for
19556 debugging is held in this wrapper. */
19557 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19559 /* Describe the stack adjustment. */
19560 tmp = gen_rtx_SET (VOIDmode,
19561 stack_pointer_rtx,
19562 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19563 RTX_FRAME_RELATED_P (tmp) = 1;
19564 XVECEXP (dwarf, 0, 0) = tmp;
19566 /* Find the first register. */
19567 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19570 i = 0;
19572 /* If there's an odd number of registers to push. Start off by
19573 pushing a single register. This ensures that subsequent strd
19574 operations are dword aligned (assuming that SP was originally
19575 64-bit aligned). */
19576 if ((num_regs & 1) != 0)
19578 rtx reg, mem, insn;
19580 reg = gen_rtx_REG (SImode, regno);
19581 if (num_regs == 1)
19582 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19583 stack_pointer_rtx));
19584 else
19585 mem = gen_frame_mem (Pmode,
19586 gen_rtx_PRE_MODIFY
19587 (Pmode, stack_pointer_rtx,
19588 plus_constant (Pmode, stack_pointer_rtx,
19589 -4 * num_regs)));
19591 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19592 RTX_FRAME_RELATED_P (tmp) = 1;
19593 insn = emit_insn (tmp);
19594 RTX_FRAME_RELATED_P (insn) = 1;
19595 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19596 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19597 reg);
19598 RTX_FRAME_RELATED_P (tmp) = 1;
19599 i++;
19600 regno++;
19601 XVECEXP (dwarf, 0, i) = tmp;
19602 first = false;
19605 while (i < num_regs)
19606 if (saved_regs_mask & (1 << regno))
19608 rtx reg1, reg2, mem1, mem2;
19609 rtx tmp0, tmp1, tmp2;
19610 int regno2;
19612 /* Find the register to pair with this one. */
19613 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19614 regno2++)
19617 reg1 = gen_rtx_REG (SImode, regno);
19618 reg2 = gen_rtx_REG (SImode, regno2);
19620 if (first)
19622 rtx insn;
19624 first = false;
19625 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19626 stack_pointer_rtx,
19627 -4 * num_regs));
19628 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19629 stack_pointer_rtx,
19630 -4 * (num_regs - 1)));
19631 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19632 plus_constant (Pmode, stack_pointer_rtx,
19633 -4 * (num_regs)));
19634 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19635 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19636 RTX_FRAME_RELATED_P (tmp0) = 1;
19637 RTX_FRAME_RELATED_P (tmp1) = 1;
19638 RTX_FRAME_RELATED_P (tmp2) = 1;
19639 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19640 XVECEXP (par, 0, 0) = tmp0;
19641 XVECEXP (par, 0, 1) = tmp1;
19642 XVECEXP (par, 0, 2) = tmp2;
19643 insn = emit_insn (par);
19644 RTX_FRAME_RELATED_P (insn) = 1;
19645 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19647 else
19649 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19650 stack_pointer_rtx,
19651 4 * i));
19652 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19653 stack_pointer_rtx,
19654 4 * (i + 1)));
19655 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19656 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19657 RTX_FRAME_RELATED_P (tmp1) = 1;
19658 RTX_FRAME_RELATED_P (tmp2) = 1;
19659 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19660 XVECEXP (par, 0, 0) = tmp1;
19661 XVECEXP (par, 0, 1) = tmp2;
19662 emit_insn (par);
19665 /* Create unwind information. This is an approximation. */
19666 tmp1 = gen_rtx_SET (VOIDmode,
19667 gen_frame_mem (Pmode,
19668 plus_constant (Pmode,
19669 stack_pointer_rtx,
19670 4 * i)),
19671 reg1);
19672 tmp2 = gen_rtx_SET (VOIDmode,
19673 gen_frame_mem (Pmode,
19674 plus_constant (Pmode,
19675 stack_pointer_rtx,
19676 4 * (i + 1))),
19677 reg2);
19679 RTX_FRAME_RELATED_P (tmp1) = 1;
19680 RTX_FRAME_RELATED_P (tmp2) = 1;
19681 XVECEXP (dwarf, 0, i + 1) = tmp1;
19682 XVECEXP (dwarf, 0, i + 2) = tmp2;
19683 i += 2;
19684 regno = regno2 + 1;
19686 else
19687 regno++;
19689 return;
19692 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19693 whenever possible, otherwise it emits single-word stores. The first store
19694 also allocates stack space for all saved registers, using writeback with
19695 post-addressing mode. All other stores use offset addressing. If no STRD
19696 can be emitted, this function emits a sequence of single-word stores,
19697 and not an STM as before, because single-word stores provide more freedom
19698 scheduling and can be turned into an STM by peephole optimizations. */
19699 static void
19700 arm_emit_strd_push (unsigned long saved_regs_mask)
19702 int num_regs = 0;
19703 int i, j, dwarf_index = 0;
19704 int offset = 0;
19705 rtx dwarf = NULL_RTX;
19706 rtx insn = NULL_RTX;
19707 rtx tmp, mem;
19709 /* TODO: A more efficient code can be emitted by changing the
19710 layout, e.g., first push all pairs that can use STRD to keep the
19711 stack aligned, and then push all other registers. */
19712 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19713 if (saved_regs_mask & (1 << i))
19714 num_regs++;
19716 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19717 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19718 gcc_assert (num_regs > 0);
19720 /* Create sequence for DWARF info. */
19721 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19723 /* For dwarf info, we generate explicit stack update. */
19724 tmp = gen_rtx_SET (VOIDmode,
19725 stack_pointer_rtx,
19726 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19727 RTX_FRAME_RELATED_P (tmp) = 1;
19728 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19730 /* Save registers. */
19731 offset = - 4 * num_regs;
19732 j = 0;
19733 while (j <= LAST_ARM_REGNUM)
19734 if (saved_regs_mask & (1 << j))
19736 if ((j % 2 == 0)
19737 && (saved_regs_mask & (1 << (j + 1))))
19739 /* Current register and previous register form register pair for
19740 which STRD can be generated. */
19741 if (offset < 0)
19743 /* Allocate stack space for all saved registers. */
19744 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19745 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19746 mem = gen_frame_mem (DImode, tmp);
19747 offset = 0;
19749 else if (offset > 0)
19750 mem = gen_frame_mem (DImode,
19751 plus_constant (Pmode,
19752 stack_pointer_rtx,
19753 offset));
19754 else
19755 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19757 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19758 RTX_FRAME_RELATED_P (tmp) = 1;
19759 tmp = emit_insn (tmp);
19761 /* Record the first store insn. */
19762 if (dwarf_index == 1)
19763 insn = tmp;
19765 /* Generate dwarf info. */
19766 mem = gen_frame_mem (SImode,
19767 plus_constant (Pmode,
19768 stack_pointer_rtx,
19769 offset));
19770 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19771 RTX_FRAME_RELATED_P (tmp) = 1;
19772 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19774 mem = gen_frame_mem (SImode,
19775 plus_constant (Pmode,
19776 stack_pointer_rtx,
19777 offset + 4));
19778 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19779 RTX_FRAME_RELATED_P (tmp) = 1;
19780 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19782 offset += 8;
19783 j += 2;
19785 else
19787 /* Emit a single word store. */
19788 if (offset < 0)
19790 /* Allocate stack space for all saved registers. */
19791 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19792 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19793 mem = gen_frame_mem (SImode, tmp);
19794 offset = 0;
19796 else if (offset > 0)
19797 mem = gen_frame_mem (SImode,
19798 plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 offset));
19801 else
19802 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19804 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19805 RTX_FRAME_RELATED_P (tmp) = 1;
19806 tmp = emit_insn (tmp);
19808 /* Record the first store insn. */
19809 if (dwarf_index == 1)
19810 insn = tmp;
19812 /* Generate dwarf info. */
19813 mem = gen_frame_mem (SImode,
19814 plus_constant(Pmode,
19815 stack_pointer_rtx,
19816 offset));
19817 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19818 RTX_FRAME_RELATED_P (tmp) = 1;
19819 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19821 offset += 4;
19822 j += 1;
19825 else
19826 j++;
19828 /* Attach dwarf info to the first insn we generate. */
19829 gcc_assert (insn != NULL_RTX);
19830 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19831 RTX_FRAME_RELATED_P (insn) = 1;
19834 /* Generate and emit an insn that we will recognize as a push_multi.
19835 Unfortunately, since this insn does not reflect very well the actual
19836 semantics of the operation, we need to annotate the insn for the benefit
19837 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19838 MASK for registers that should be annotated for DWARF2 frame unwind
19839 information. */
19840 static rtx
19841 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19843 int num_regs = 0;
19844 int num_dwarf_regs = 0;
19845 int i, j;
19846 rtx par;
19847 rtx dwarf;
19848 int dwarf_par_index;
19849 rtx tmp, reg;
19851 /* We don't record the PC in the dwarf frame information. */
19852 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19854 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19856 if (mask & (1 << i))
19857 num_regs++;
19858 if (dwarf_regs_mask & (1 << i))
19859 num_dwarf_regs++;
19862 gcc_assert (num_regs && num_regs <= 16);
19863 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19865 /* For the body of the insn we are going to generate an UNSPEC in
19866 parallel with several USEs. This allows the insn to be recognized
19867 by the push_multi pattern in the arm.md file.
19869 The body of the insn looks something like this:
19871 (parallel [
19872 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19873 (const_int:SI <num>)))
19874 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19875 (use (reg:SI XX))
19876 (use (reg:SI YY))
19880 For the frame note however, we try to be more explicit and actually
19881 show each register being stored into the stack frame, plus a (single)
19882 decrement of the stack pointer. We do it this way in order to be
19883 friendly to the stack unwinding code, which only wants to see a single
19884 stack decrement per instruction. The RTL we generate for the note looks
19885 something like this:
19887 (sequence [
19888 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19889 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19890 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19891 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19895 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19896 instead we'd have a parallel expression detailing all
19897 the stores to the various memory addresses so that debug
19898 information is more up-to-date. Remember however while writing
19899 this to take care of the constraints with the push instruction.
19901 Note also that this has to be taken care of for the VFP registers.
19903 For more see PR43399. */
19905 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19906 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19907 dwarf_par_index = 1;
19909 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19911 if (mask & (1 << i))
19913 reg = gen_rtx_REG (SImode, i);
19915 XVECEXP (par, 0, 0)
19916 = gen_rtx_SET (VOIDmode,
19917 gen_frame_mem
19918 (BLKmode,
19919 gen_rtx_PRE_MODIFY (Pmode,
19920 stack_pointer_rtx,
19921 plus_constant
19922 (Pmode, stack_pointer_rtx,
19923 -4 * num_regs))
19925 gen_rtx_UNSPEC (BLKmode,
19926 gen_rtvec (1, reg),
19927 UNSPEC_PUSH_MULT));
19929 if (dwarf_regs_mask & (1 << i))
19931 tmp = gen_rtx_SET (VOIDmode,
19932 gen_frame_mem (SImode, stack_pointer_rtx),
19933 reg);
19934 RTX_FRAME_RELATED_P (tmp) = 1;
19935 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19938 break;
19942 for (j = 1, i++; j < num_regs; i++)
19944 if (mask & (1 << i))
19946 reg = gen_rtx_REG (SImode, i);
19948 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19950 if (dwarf_regs_mask & (1 << i))
19953 = gen_rtx_SET (VOIDmode,
19954 gen_frame_mem
19955 (SImode,
19956 plus_constant (Pmode, stack_pointer_rtx,
19957 4 * j)),
19958 reg);
19959 RTX_FRAME_RELATED_P (tmp) = 1;
19960 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19963 j++;
19967 par = emit_insn (par);
19969 tmp = gen_rtx_SET (VOIDmode,
19970 stack_pointer_rtx,
19971 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19972 RTX_FRAME_RELATED_P (tmp) = 1;
19973 XVECEXP (dwarf, 0, 0) = tmp;
19975 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19977 return par;
19980 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19981 SIZE is the offset to be adjusted.
19982 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19983 static void
19984 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19986 rtx dwarf;
19988 RTX_FRAME_RELATED_P (insn) = 1;
19989 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19990 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19993 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19994 SAVED_REGS_MASK shows which registers need to be restored.
19996 Unfortunately, since this insn does not reflect very well the actual
19997 semantics of the operation, we need to annotate the insn for the benefit
19998 of DWARF2 frame unwind information. */
19999 static void
20000 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20002 int num_regs = 0;
20003 int i, j;
20004 rtx par;
20005 rtx dwarf = NULL_RTX;
20006 rtx tmp, reg;
20007 bool return_in_pc;
20008 int offset_adj;
20009 int emit_update;
20011 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20012 offset_adj = return_in_pc ? 1 : 0;
20013 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20014 if (saved_regs_mask & (1 << i))
20015 num_regs++;
20017 gcc_assert (num_regs && num_regs <= 16);
20019 /* If SP is in reglist, then we don't emit SP update insn. */
20020 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20022 /* The parallel needs to hold num_regs SETs
20023 and one SET for the stack update. */
20024 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20026 if (return_in_pc)
20028 tmp = ret_rtx;
20029 XVECEXP (par, 0, 0) = tmp;
20032 if (emit_update)
20034 /* Increment the stack pointer, based on there being
20035 num_regs 4-byte registers to restore. */
20036 tmp = gen_rtx_SET (VOIDmode,
20037 stack_pointer_rtx,
20038 plus_constant (Pmode,
20039 stack_pointer_rtx,
20040 4 * num_regs));
20041 RTX_FRAME_RELATED_P (tmp) = 1;
20042 XVECEXP (par, 0, offset_adj) = tmp;
20045 /* Now restore every reg, which may include PC. */
20046 for (j = 0, i = 0; j < num_regs; i++)
20047 if (saved_regs_mask & (1 << i))
20049 reg = gen_rtx_REG (SImode, i);
20050 if ((num_regs == 1) && emit_update && !return_in_pc)
20052 /* Emit single load with writeback. */
20053 tmp = gen_frame_mem (SImode,
20054 gen_rtx_POST_INC (Pmode,
20055 stack_pointer_rtx));
20056 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20057 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20058 return;
20061 tmp = gen_rtx_SET (VOIDmode,
20062 reg,
20063 gen_frame_mem
20064 (SImode,
20065 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20066 RTX_FRAME_RELATED_P (tmp) = 1;
20067 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20069 /* We need to maintain a sequence for DWARF info too. As dwarf info
20070 should not have PC, skip PC. */
20071 if (i != PC_REGNUM)
20072 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20074 j++;
20077 if (return_in_pc)
20078 par = emit_jump_insn (par);
20079 else
20080 par = emit_insn (par);
20082 REG_NOTES (par) = dwarf;
20083 if (!return_in_pc)
20084 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20085 stack_pointer_rtx, stack_pointer_rtx);
20088 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20089 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20091 Unfortunately, since this insn does not reflect very well the actual
20092 semantics of the operation, we need to annotate the insn for the benefit
20093 of DWARF2 frame unwind information. */
20094 static void
20095 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20097 int i, j;
20098 rtx par;
20099 rtx dwarf = NULL_RTX;
20100 rtx tmp, reg;
20102 gcc_assert (num_regs && num_regs <= 32);
20104 /* Workaround ARM10 VFPr1 bug. */
20105 if (num_regs == 2 && !arm_arch6)
20107 if (first_reg == 15)
20108 first_reg--;
20110 num_regs++;
20113 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20114 there could be up to 32 D-registers to restore.
20115 If there are more than 16 D-registers, make two recursive calls,
20116 each of which emits one pop_multi instruction. */
20117 if (num_regs > 16)
20119 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20120 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20121 return;
20124 /* The parallel needs to hold num_regs SETs
20125 and one SET for the stack update. */
20126 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20128 /* Increment the stack pointer, based on there being
20129 num_regs 8-byte registers to restore. */
20130 tmp = gen_rtx_SET (VOIDmode,
20131 base_reg,
20132 plus_constant (Pmode, base_reg, 8 * num_regs));
20133 RTX_FRAME_RELATED_P (tmp) = 1;
20134 XVECEXP (par, 0, 0) = tmp;
20136 /* Now show every reg that will be restored, using a SET for each. */
20137 for (j = 0, i=first_reg; j < num_regs; i += 2)
20139 reg = gen_rtx_REG (DFmode, i);
20141 tmp = gen_rtx_SET (VOIDmode,
20142 reg,
20143 gen_frame_mem
20144 (DFmode,
20145 plus_constant (Pmode, base_reg, 8 * j)));
20146 RTX_FRAME_RELATED_P (tmp) = 1;
20147 XVECEXP (par, 0, j + 1) = tmp;
20149 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20151 j++;
20154 par = emit_insn (par);
20155 REG_NOTES (par) = dwarf;
20157 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20158 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20160 RTX_FRAME_RELATED_P (par) = 1;
20161 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20163 else
20164 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20165 base_reg, base_reg);
20168 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20169 number of registers are being popped, multiple LDRD patterns are created for
20170 all register pairs. If odd number of registers are popped, last register is
20171 loaded by using LDR pattern. */
20172 static void
20173 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20175 int num_regs = 0;
20176 int i, j;
20177 rtx par = NULL_RTX;
20178 rtx dwarf = NULL_RTX;
20179 rtx tmp, reg, tmp1;
20180 bool return_in_pc;
20182 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20183 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184 if (saved_regs_mask & (1 << i))
20185 num_regs++;
20187 gcc_assert (num_regs && num_regs <= 16);
20189 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20190 to be popped. So, if num_regs is even, now it will become odd,
20191 and we can generate pop with PC. If num_regs is odd, it will be
20192 even now, and ldr with return can be generated for PC. */
20193 if (return_in_pc)
20194 num_regs--;
20196 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20198 /* Var j iterates over all the registers to gather all the registers in
20199 saved_regs_mask. Var i gives index of saved registers in stack frame.
20200 A PARALLEL RTX of register-pair is created here, so that pattern for
20201 LDRD can be matched. As PC is always last register to be popped, and
20202 we have already decremented num_regs if PC, we don't have to worry
20203 about PC in this loop. */
20204 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20205 if (saved_regs_mask & (1 << j))
20207 /* Create RTX for memory load. */
20208 reg = gen_rtx_REG (SImode, j);
20209 tmp = gen_rtx_SET (SImode,
20210 reg,
20211 gen_frame_mem (SImode,
20212 plus_constant (Pmode,
20213 stack_pointer_rtx, 4 * i)));
20214 RTX_FRAME_RELATED_P (tmp) = 1;
20216 if (i % 2 == 0)
20218 /* When saved-register index (i) is even, the RTX to be emitted is
20219 yet to be created. Hence create it first. The LDRD pattern we
20220 are generating is :
20221 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20222 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20223 where target registers need not be consecutive. */
20224 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20225 dwarf = NULL_RTX;
20228 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20229 added as 0th element and if i is odd, reg_i is added as 1st element
20230 of LDRD pattern shown above. */
20231 XVECEXP (par, 0, (i % 2)) = tmp;
20232 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20234 if ((i % 2) == 1)
20236 /* When saved-register index (i) is odd, RTXs for both the registers
20237 to be loaded are generated in above given LDRD pattern, and the
20238 pattern can be emitted now. */
20239 par = emit_insn (par);
20240 REG_NOTES (par) = dwarf;
20241 RTX_FRAME_RELATED_P (par) = 1;
20244 i++;
20247 /* If the number of registers pushed is odd AND return_in_pc is false OR
20248 number of registers are even AND return_in_pc is true, last register is
20249 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20250 then LDR with post increment. */
20252 /* Increment the stack pointer, based on there being
20253 num_regs 4-byte registers to restore. */
20254 tmp = gen_rtx_SET (VOIDmode,
20255 stack_pointer_rtx,
20256 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20257 RTX_FRAME_RELATED_P (tmp) = 1;
20258 tmp = emit_insn (tmp);
20259 if (!return_in_pc)
20261 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20262 stack_pointer_rtx, stack_pointer_rtx);
20265 dwarf = NULL_RTX;
20267 if (((num_regs % 2) == 1 && !return_in_pc)
20268 || ((num_regs % 2) == 0 && return_in_pc))
20270 /* Scan for the single register to be popped. Skip until the saved
20271 register is found. */
20272 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20274 /* Gen LDR with post increment here. */
20275 tmp1 = gen_rtx_MEM (SImode,
20276 gen_rtx_POST_INC (SImode,
20277 stack_pointer_rtx));
20278 set_mem_alias_set (tmp1, get_frame_alias_set ());
20280 reg = gen_rtx_REG (SImode, j);
20281 tmp = gen_rtx_SET (SImode, reg, tmp1);
20282 RTX_FRAME_RELATED_P (tmp) = 1;
20283 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20285 if (return_in_pc)
20287 /* If return_in_pc, j must be PC_REGNUM. */
20288 gcc_assert (j == PC_REGNUM);
20289 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20290 XVECEXP (par, 0, 0) = ret_rtx;
20291 XVECEXP (par, 0, 1) = tmp;
20292 par = emit_jump_insn (par);
20294 else
20296 par = emit_insn (tmp);
20297 REG_NOTES (par) = dwarf;
20298 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20299 stack_pointer_rtx, stack_pointer_rtx);
20303 else if ((num_regs % 2) == 1 && return_in_pc)
20305 /* There are 2 registers to be popped. So, generate the pattern
20306 pop_multiple_with_stack_update_and_return to pop in PC. */
20307 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20310 return;
20313 /* LDRD in ARM mode needs consecutive registers as operands. This function
20314 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20315 offset addressing and then generates one separate stack udpate. This provides
20316 more scheduling freedom, compared to writeback on every load. However,
20317 if the function returns using load into PC directly
20318 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20319 before the last load. TODO: Add a peephole optimization to recognize
20320 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20321 peephole optimization to merge the load at stack-offset zero
20322 with the stack update instruction using load with writeback
20323 in post-index addressing mode. */
20324 static void
20325 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20327 int j = 0;
20328 int offset = 0;
20329 rtx par = NULL_RTX;
20330 rtx dwarf = NULL_RTX;
20331 rtx tmp, mem;
20333 /* Restore saved registers. */
20334 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20335 j = 0;
20336 while (j <= LAST_ARM_REGNUM)
20337 if (saved_regs_mask & (1 << j))
20339 if ((j % 2) == 0
20340 && (saved_regs_mask & (1 << (j + 1)))
20341 && (j + 1) != PC_REGNUM)
20343 /* Current register and next register form register pair for which
20344 LDRD can be generated. PC is always the last register popped, and
20345 we handle it separately. */
20346 if (offset > 0)
20347 mem = gen_frame_mem (DImode,
20348 plus_constant (Pmode,
20349 stack_pointer_rtx,
20350 offset));
20351 else
20352 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20354 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20355 tmp = emit_insn (tmp);
20356 RTX_FRAME_RELATED_P (tmp) = 1;
20358 /* Generate dwarf info. */
20360 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20361 gen_rtx_REG (SImode, j),
20362 NULL_RTX);
20363 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20364 gen_rtx_REG (SImode, j + 1),
20365 dwarf);
20367 REG_NOTES (tmp) = dwarf;
20369 offset += 8;
20370 j += 2;
20372 else if (j != PC_REGNUM)
20374 /* Emit a single word load. */
20375 if (offset > 0)
20376 mem = gen_frame_mem (SImode,
20377 plus_constant (Pmode,
20378 stack_pointer_rtx,
20379 offset));
20380 else
20381 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20383 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20384 tmp = emit_insn (tmp);
20385 RTX_FRAME_RELATED_P (tmp) = 1;
20387 /* Generate dwarf info. */
20388 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20389 gen_rtx_REG (SImode, j),
20390 NULL_RTX);
20392 offset += 4;
20393 j += 1;
20395 else /* j == PC_REGNUM */
20396 j++;
20398 else
20399 j++;
20401 /* Update the stack. */
20402 if (offset > 0)
20404 tmp = gen_rtx_SET (Pmode,
20405 stack_pointer_rtx,
20406 plus_constant (Pmode,
20407 stack_pointer_rtx,
20408 offset));
20409 tmp = emit_insn (tmp);
20410 arm_add_cfa_adjust_cfa_note (tmp, offset,
20411 stack_pointer_rtx, stack_pointer_rtx);
20412 offset = 0;
20415 if (saved_regs_mask & (1 << PC_REGNUM))
20417 /* Only PC is to be popped. */
20418 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20419 XVECEXP (par, 0, 0) = ret_rtx;
20420 tmp = gen_rtx_SET (SImode,
20421 gen_rtx_REG (SImode, PC_REGNUM),
20422 gen_frame_mem (SImode,
20423 gen_rtx_POST_INC (SImode,
20424 stack_pointer_rtx)));
20425 RTX_FRAME_RELATED_P (tmp) = 1;
20426 XVECEXP (par, 0, 1) = tmp;
20427 par = emit_jump_insn (par);
20429 /* Generate dwarf info. */
20430 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20431 gen_rtx_REG (SImode, PC_REGNUM),
20432 NULL_RTX);
20433 REG_NOTES (par) = dwarf;
20434 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20435 stack_pointer_rtx, stack_pointer_rtx);
20439 /* Calculate the size of the return value that is passed in registers. */
20440 static unsigned
20441 arm_size_return_regs (void)
20443 enum machine_mode mode;
20445 if (crtl->return_rtx != 0)
20446 mode = GET_MODE (crtl->return_rtx);
20447 else
20448 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20450 return GET_MODE_SIZE (mode);
20453 /* Return true if the current function needs to save/restore LR. */
20454 static bool
20455 thumb_force_lr_save (void)
20457 return !cfun->machine->lr_save_eliminated
20458 && (!leaf_function_p ()
20459 || thumb_far_jump_used_p ()
20460 || df_regs_ever_live_p (LR_REGNUM));
20463 /* We do not know if r3 will be available because
20464 we do have an indirect tailcall happening in this
20465 particular case. */
20466 static bool
20467 is_indirect_tailcall_p (rtx call)
20469 rtx pat = PATTERN (call);
20471 /* Indirect tail call. */
20472 pat = XVECEXP (pat, 0, 0);
20473 if (GET_CODE (pat) == SET)
20474 pat = SET_SRC (pat);
20476 pat = XEXP (XEXP (pat, 0), 0);
20477 return REG_P (pat);
20480 /* Return true if r3 is used by any of the tail call insns in the
20481 current function. */
20482 static bool
20483 any_sibcall_could_use_r3 (void)
20485 edge_iterator ei;
20486 edge e;
20488 if (!crtl->tail_call_emit)
20489 return false;
20490 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20491 if (e->flags & EDGE_SIBCALL)
20493 rtx call = BB_END (e->src);
20494 if (!CALL_P (call))
20495 call = prev_nonnote_nondebug_insn (call);
20496 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20497 if (find_regno_fusage (call, USE, 3)
20498 || is_indirect_tailcall_p (call))
20499 return true;
20501 return false;
20505 /* Compute the distance from register FROM to register TO.
20506 These can be the arg pointer (26), the soft frame pointer (25),
20507 the stack pointer (13) or the hard frame pointer (11).
20508 In thumb mode r7 is used as the soft frame pointer, if needed.
20509 Typical stack layout looks like this:
20511 old stack pointer -> | |
20512 ----
20513 | | \
20514 | | saved arguments for
20515 | | vararg functions
20516 | | /
20518 hard FP & arg pointer -> | | \
20519 | | stack
20520 | | frame
20521 | | /
20523 | | \
20524 | | call saved
20525 | | registers
20526 soft frame pointer -> | | /
20528 | | \
20529 | | local
20530 | | variables
20531 locals base pointer -> | | /
20533 | | \
20534 | | outgoing
20535 | | arguments
20536 current stack pointer -> | | /
20539 For a given function some or all of these stack components
20540 may not be needed, giving rise to the possibility of
20541 eliminating some of the registers.
20543 The values returned by this function must reflect the behavior
20544 of arm_expand_prologue() and arm_compute_save_reg_mask().
20546 The sign of the number returned reflects the direction of stack
20547 growth, so the values are positive for all eliminations except
20548 from the soft frame pointer to the hard frame pointer.
20550 SFP may point just inside the local variables block to ensure correct
20551 alignment. */
20554 /* Calculate stack offsets. These are used to calculate register elimination
20555 offsets and in prologue/epilogue code. Also calculates which registers
20556 should be saved. */
20558 static arm_stack_offsets *
20559 arm_get_frame_offsets (void)
20561 struct arm_stack_offsets *offsets;
20562 unsigned long func_type;
20563 int leaf;
20564 int saved;
20565 int core_saved;
20566 HOST_WIDE_INT frame_size;
20567 int i;
20569 offsets = &cfun->machine->stack_offsets;
20571 /* We need to know if we are a leaf function. Unfortunately, it
20572 is possible to be called after start_sequence has been called,
20573 which causes get_insns to return the insns for the sequence,
20574 not the function, which will cause leaf_function_p to return
20575 the incorrect result.
20577 to know about leaf functions once reload has completed, and the
20578 frame size cannot be changed after that time, so we can safely
20579 use the cached value. */
20581 if (reload_completed)
20582 return offsets;
20584 /* Initially this is the size of the local variables. It will translated
20585 into an offset once we have determined the size of preceding data. */
20586 frame_size = ROUND_UP_WORD (get_frame_size ());
20588 leaf = leaf_function_p ();
20590 /* Space for variadic functions. */
20591 offsets->saved_args = crtl->args.pretend_args_size;
20593 /* In Thumb mode this is incorrect, but never used. */
20594 offsets->frame
20595 = (offsets->saved_args
20596 + arm_compute_static_chain_stack_bytes ()
20597 + (frame_pointer_needed ? 4 : 0));
20599 if (TARGET_32BIT)
20601 unsigned int regno;
20603 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20604 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20605 saved = core_saved;
20607 /* We know that SP will be doubleword aligned on entry, and we must
20608 preserve that condition at any subroutine call. We also require the
20609 soft frame pointer to be doubleword aligned. */
20611 if (TARGET_REALLY_IWMMXT)
20613 /* Check for the call-saved iWMMXt registers. */
20614 for (regno = FIRST_IWMMXT_REGNUM;
20615 regno <= LAST_IWMMXT_REGNUM;
20616 regno++)
20617 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20618 saved += 8;
20621 func_type = arm_current_func_type ();
20622 /* Space for saved VFP registers. */
20623 if (! IS_VOLATILE (func_type)
20624 && TARGET_HARD_FLOAT && TARGET_VFP)
20625 saved += arm_get_vfp_saved_size ();
20627 else /* TARGET_THUMB1 */
20629 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20630 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20631 saved = core_saved;
20632 if (TARGET_BACKTRACE)
20633 saved += 16;
20636 /* Saved registers include the stack frame. */
20637 offsets->saved_regs
20638 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20639 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20641 /* A leaf function does not need any stack alignment if it has nothing
20642 on the stack. */
20643 if (leaf && frame_size == 0
20644 /* However if it calls alloca(), we have a dynamically allocated
20645 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20646 && ! cfun->calls_alloca)
20648 offsets->outgoing_args = offsets->soft_frame;
20649 offsets->locals_base = offsets->soft_frame;
20650 return offsets;
20653 /* Ensure SFP has the correct alignment. */
20654 if (ARM_DOUBLEWORD_ALIGN
20655 && (offsets->soft_frame & 7))
20657 offsets->soft_frame += 4;
20658 /* Try to align stack by pushing an extra reg. Don't bother doing this
20659 when there is a stack frame as the alignment will be rolled into
20660 the normal stack adjustment. */
20661 if (frame_size + crtl->outgoing_args_size == 0)
20663 int reg = -1;
20665 /* If it is safe to use r3, then do so. This sometimes
20666 generates better code on Thumb-2 by avoiding the need to
20667 use 32-bit push/pop instructions. */
20668 if (! any_sibcall_could_use_r3 ()
20669 && arm_size_return_regs () <= 12
20670 && (offsets->saved_regs_mask & (1 << 3)) == 0
20671 && (TARGET_THUMB2
20672 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20674 reg = 3;
20676 else
20677 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20679 /* Avoid fixed registers; they may be changed at
20680 arbitrary times so it's unsafe to restore them
20681 during the epilogue. */
20682 if (!fixed_regs[i]
20683 && (offsets->saved_regs_mask & (1 << i)) == 0)
20685 reg = i;
20686 break;
20690 if (reg != -1)
20692 offsets->saved_regs += 4;
20693 offsets->saved_regs_mask |= (1 << reg);
20698 offsets->locals_base = offsets->soft_frame + frame_size;
20699 offsets->outgoing_args = (offsets->locals_base
20700 + crtl->outgoing_args_size);
20702 if (ARM_DOUBLEWORD_ALIGN)
20704 /* Ensure SP remains doubleword aligned. */
20705 if (offsets->outgoing_args & 7)
20706 offsets->outgoing_args += 4;
20707 gcc_assert (!(offsets->outgoing_args & 7));
20710 return offsets;
20714 /* Calculate the relative offsets for the different stack pointers. Positive
20715 offsets are in the direction of stack growth. */
20717 HOST_WIDE_INT
20718 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20720 arm_stack_offsets *offsets;
20722 offsets = arm_get_frame_offsets ();
20724 /* OK, now we have enough information to compute the distances.
20725 There must be an entry in these switch tables for each pair
20726 of registers in ELIMINABLE_REGS, even if some of the entries
20727 seem to be redundant or useless. */
20728 switch (from)
20730 case ARG_POINTER_REGNUM:
20731 switch (to)
20733 case THUMB_HARD_FRAME_POINTER_REGNUM:
20734 return 0;
20736 case FRAME_POINTER_REGNUM:
20737 /* This is the reverse of the soft frame pointer
20738 to hard frame pointer elimination below. */
20739 return offsets->soft_frame - offsets->saved_args;
20741 case ARM_HARD_FRAME_POINTER_REGNUM:
20742 /* This is only non-zero in the case where the static chain register
20743 is stored above the frame. */
20744 return offsets->frame - offsets->saved_args - 4;
20746 case STACK_POINTER_REGNUM:
20747 /* If nothing has been pushed on the stack at all
20748 then this will return -4. This *is* correct! */
20749 return offsets->outgoing_args - (offsets->saved_args + 4);
20751 default:
20752 gcc_unreachable ();
20754 gcc_unreachable ();
20756 case FRAME_POINTER_REGNUM:
20757 switch (to)
20759 case THUMB_HARD_FRAME_POINTER_REGNUM:
20760 return 0;
20762 case ARM_HARD_FRAME_POINTER_REGNUM:
20763 /* The hard frame pointer points to the top entry in the
20764 stack frame. The soft frame pointer to the bottom entry
20765 in the stack frame. If there is no stack frame at all,
20766 then they are identical. */
20768 return offsets->frame - offsets->soft_frame;
20770 case STACK_POINTER_REGNUM:
20771 return offsets->outgoing_args - offsets->soft_frame;
20773 default:
20774 gcc_unreachable ();
20776 gcc_unreachable ();
20778 default:
20779 /* You cannot eliminate from the stack pointer.
20780 In theory you could eliminate from the hard frame
20781 pointer to the stack pointer, but this will never
20782 happen, since if a stack frame is not needed the
20783 hard frame pointer will never be used. */
20784 gcc_unreachable ();
20788 /* Given FROM and TO register numbers, say whether this elimination is
20789 allowed. Frame pointer elimination is automatically handled.
20791 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20792 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20793 pointer, we must eliminate FRAME_POINTER_REGNUM into
20794 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20795 ARG_POINTER_REGNUM. */
20797 bool
20798 arm_can_eliminate (const int from, const int to)
20800 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20801 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20802 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20803 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20804 true);
20807 /* Emit RTL to save coprocessor registers on function entry. Returns the
20808 number of bytes pushed. */
20810 static int
20811 arm_save_coproc_regs(void)
20813 int saved_size = 0;
20814 unsigned reg;
20815 unsigned start_reg;
20816 rtx insn;
20818 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20819 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20821 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20822 insn = gen_rtx_MEM (V2SImode, insn);
20823 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20824 RTX_FRAME_RELATED_P (insn) = 1;
20825 saved_size += 8;
20828 if (TARGET_HARD_FLOAT && TARGET_VFP)
20830 start_reg = FIRST_VFP_REGNUM;
20832 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20834 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20835 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20837 if (start_reg != reg)
20838 saved_size += vfp_emit_fstmd (start_reg,
20839 (reg - start_reg) / 2);
20840 start_reg = reg + 2;
20843 if (start_reg != reg)
20844 saved_size += vfp_emit_fstmd (start_reg,
20845 (reg - start_reg) / 2);
20847 return saved_size;
20851 /* Set the Thumb frame pointer from the stack pointer. */
20853 static void
20854 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20856 HOST_WIDE_INT amount;
20857 rtx insn, dwarf;
20859 amount = offsets->outgoing_args - offsets->locals_base;
20860 if (amount < 1024)
20861 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20862 stack_pointer_rtx, GEN_INT (amount)));
20863 else
20865 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20866 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20867 expects the first two operands to be the same. */
20868 if (TARGET_THUMB2)
20870 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20871 stack_pointer_rtx,
20872 hard_frame_pointer_rtx));
20874 else
20876 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20877 hard_frame_pointer_rtx,
20878 stack_pointer_rtx));
20880 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20881 plus_constant (Pmode, stack_pointer_rtx, amount));
20882 RTX_FRAME_RELATED_P (dwarf) = 1;
20883 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20886 RTX_FRAME_RELATED_P (insn) = 1;
20889 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20890 function. */
20891 void
20892 arm_expand_prologue (void)
20894 rtx amount;
20895 rtx insn;
20896 rtx ip_rtx;
20897 unsigned long live_regs_mask;
20898 unsigned long func_type;
20899 int fp_offset = 0;
20900 int saved_pretend_args = 0;
20901 int saved_regs = 0;
20902 unsigned HOST_WIDE_INT args_to_push;
20903 arm_stack_offsets *offsets;
20905 func_type = arm_current_func_type ();
20907 /* Naked functions don't have prologues. */
20908 if (IS_NAKED (func_type))
20909 return;
20911 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20912 args_to_push = crtl->args.pretend_args_size;
20914 /* Compute which register we will have to save onto the stack. */
20915 offsets = arm_get_frame_offsets ();
20916 live_regs_mask = offsets->saved_regs_mask;
20918 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20920 if (IS_STACKALIGN (func_type))
20922 rtx r0, r1;
20924 /* Handle a word-aligned stack pointer. We generate the following:
20926 mov r0, sp
20927 bic r1, r0, #7
20928 mov sp, r1
20929 <save and restore r0 in normal prologue/epilogue>
20930 mov sp, r0
20931 bx lr
20933 The unwinder doesn't need to know about the stack realignment.
20934 Just tell it we saved SP in r0. */
20935 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20937 r0 = gen_rtx_REG (SImode, 0);
20938 r1 = gen_rtx_REG (SImode, 1);
20940 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20941 RTX_FRAME_RELATED_P (insn) = 1;
20942 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20944 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20946 /* ??? The CFA changes here, which may cause GDB to conclude that it
20947 has entered a different function. That said, the unwind info is
20948 correct, individually, before and after this instruction because
20949 we've described the save of SP, which will override the default
20950 handling of SP as restoring from the CFA. */
20951 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20954 /* For APCS frames, if IP register is clobbered
20955 when creating frame, save that register in a special
20956 way. */
20957 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20959 if (IS_INTERRUPT (func_type))
20961 /* Interrupt functions must not corrupt any registers.
20962 Creating a frame pointer however, corrupts the IP
20963 register, so we must push it first. */
20964 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20966 /* Do not set RTX_FRAME_RELATED_P on this insn.
20967 The dwarf stack unwinding code only wants to see one
20968 stack decrement per function, and this is not it. If
20969 this instruction is labeled as being part of the frame
20970 creation sequence then dwarf2out_frame_debug_expr will
20971 die when it encounters the assignment of IP to FP
20972 later on, since the use of SP here establishes SP as
20973 the CFA register and not IP.
20975 Anyway this instruction is not really part of the stack
20976 frame creation although it is part of the prologue. */
20978 else if (IS_NESTED (func_type))
20980 /* The static chain register is the same as the IP register
20981 used as a scratch register during stack frame creation.
20982 To get around this need to find somewhere to store IP
20983 whilst the frame is being created. We try the following
20984 places in order:
20986 1. The last argument register r3 if it is available.
20987 2. A slot on the stack above the frame if there are no
20988 arguments to push onto the stack.
20989 3. Register r3 again, after pushing the argument registers
20990 onto the stack, if this is a varargs function.
20991 4. The last slot on the stack created for the arguments to
20992 push, if this isn't a varargs function.
20994 Note - we only need to tell the dwarf2 backend about the SP
20995 adjustment in the second variant; the static chain register
20996 doesn't need to be unwound, as it doesn't contain a value
20997 inherited from the caller. */
20999 if (!arm_r3_live_at_start_p ())
21000 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21001 else if (args_to_push == 0)
21003 rtx addr, dwarf;
21005 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21006 saved_regs += 4;
21008 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21009 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21010 fp_offset = 4;
21012 /* Just tell the dwarf backend that we adjusted SP. */
21013 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21014 plus_constant (Pmode, stack_pointer_rtx,
21015 -fp_offset));
21016 RTX_FRAME_RELATED_P (insn) = 1;
21017 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21019 else
21021 /* Store the args on the stack. */
21022 if (cfun->machine->uses_anonymous_args)
21024 insn
21025 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21026 (0xf0 >> (args_to_push / 4)) & 0xf);
21027 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21028 saved_pretend_args = 1;
21030 else
21032 rtx addr, dwarf;
21034 if (args_to_push == 4)
21035 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21036 else
21037 addr
21038 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21039 plus_constant (Pmode,
21040 stack_pointer_rtx,
21041 -args_to_push));
21043 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21045 /* Just tell the dwarf backend that we adjusted SP. */
21046 dwarf
21047 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21048 plus_constant (Pmode, stack_pointer_rtx,
21049 -args_to_push));
21050 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21053 RTX_FRAME_RELATED_P (insn) = 1;
21054 fp_offset = args_to_push;
21055 args_to_push = 0;
21059 insn = emit_set_insn (ip_rtx,
21060 plus_constant (Pmode, stack_pointer_rtx,
21061 fp_offset));
21062 RTX_FRAME_RELATED_P (insn) = 1;
21065 if (args_to_push)
21067 /* Push the argument registers, or reserve space for them. */
21068 if (cfun->machine->uses_anonymous_args)
21069 insn = emit_multi_reg_push
21070 ((0xf0 >> (args_to_push / 4)) & 0xf,
21071 (0xf0 >> (args_to_push / 4)) & 0xf);
21072 else
21073 insn = emit_insn
21074 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21075 GEN_INT (- args_to_push)));
21076 RTX_FRAME_RELATED_P (insn) = 1;
21079 /* If this is an interrupt service routine, and the link register
21080 is going to be pushed, and we're not generating extra
21081 push of IP (needed when frame is needed and frame layout if apcs),
21082 subtracting four from LR now will mean that the function return
21083 can be done with a single instruction. */
21084 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21085 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21086 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21087 && TARGET_ARM)
21089 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21091 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21094 if (live_regs_mask)
21096 unsigned long dwarf_regs_mask = live_regs_mask;
21098 saved_regs += bit_count (live_regs_mask) * 4;
21099 if (optimize_size && !frame_pointer_needed
21100 && saved_regs == offsets->saved_regs - offsets->saved_args)
21102 /* If no coprocessor registers are being pushed and we don't have
21103 to worry about a frame pointer then push extra registers to
21104 create the stack frame. This is done is a way that does not
21105 alter the frame layout, so is independent of the epilogue. */
21106 int n;
21107 int frame;
21108 n = 0;
21109 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21110 n++;
21111 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21112 if (frame && n * 4 >= frame)
21114 n = frame / 4;
21115 live_regs_mask |= (1 << n) - 1;
21116 saved_regs += frame;
21120 if (TARGET_LDRD
21121 && current_tune->prefer_ldrd_strd
21122 && !optimize_function_for_size_p (cfun))
21124 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21125 if (TARGET_THUMB2)
21126 thumb2_emit_strd_push (live_regs_mask);
21127 else if (TARGET_ARM
21128 && !TARGET_APCS_FRAME
21129 && !IS_INTERRUPT (func_type))
21130 arm_emit_strd_push (live_regs_mask);
21131 else
21133 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21134 RTX_FRAME_RELATED_P (insn) = 1;
21137 else
21139 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21140 RTX_FRAME_RELATED_P (insn) = 1;
21144 if (! IS_VOLATILE (func_type))
21145 saved_regs += arm_save_coproc_regs ();
21147 if (frame_pointer_needed && TARGET_ARM)
21149 /* Create the new frame pointer. */
21150 if (TARGET_APCS_FRAME)
21152 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21153 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21154 RTX_FRAME_RELATED_P (insn) = 1;
21156 if (IS_NESTED (func_type))
21158 /* Recover the static chain register. */
21159 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21160 insn = gen_rtx_REG (SImode, 3);
21161 else
21163 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21164 insn = gen_frame_mem (SImode, insn);
21166 emit_set_insn (ip_rtx, insn);
21167 /* Add a USE to stop propagate_one_insn() from barfing. */
21168 emit_insn (gen_force_register_use (ip_rtx));
21171 else
21173 insn = GEN_INT (saved_regs - 4);
21174 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21175 stack_pointer_rtx, insn));
21176 RTX_FRAME_RELATED_P (insn) = 1;
21180 if (flag_stack_usage_info)
21181 current_function_static_stack_size
21182 = offsets->outgoing_args - offsets->saved_args;
21184 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21186 /* This add can produce multiple insns for a large constant, so we
21187 need to get tricky. */
21188 rtx last = get_last_insn ();
21190 amount = GEN_INT (offsets->saved_args + saved_regs
21191 - offsets->outgoing_args);
21193 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21194 amount));
21197 last = last ? NEXT_INSN (last) : get_insns ();
21198 RTX_FRAME_RELATED_P (last) = 1;
21200 while (last != insn);
21202 /* If the frame pointer is needed, emit a special barrier that
21203 will prevent the scheduler from moving stores to the frame
21204 before the stack adjustment. */
21205 if (frame_pointer_needed)
21206 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21207 hard_frame_pointer_rtx));
21211 if (frame_pointer_needed && TARGET_THUMB2)
21212 thumb_set_frame_pointer (offsets);
21214 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21216 unsigned long mask;
21218 mask = live_regs_mask;
21219 mask &= THUMB2_WORK_REGS;
21220 if (!IS_NESTED (func_type))
21221 mask |= (1 << IP_REGNUM);
21222 arm_load_pic_register (mask);
21225 /* If we are profiling, make sure no instructions are scheduled before
21226 the call to mcount. Similarly if the user has requested no
21227 scheduling in the prolog. Similarly if we want non-call exceptions
21228 using the EABI unwinder, to prevent faulting instructions from being
21229 swapped with a stack adjustment. */
21230 if (crtl->profile || !TARGET_SCHED_PROLOG
21231 || (arm_except_unwind_info (&global_options) == UI_TARGET
21232 && cfun->can_throw_non_call_exceptions))
21233 emit_insn (gen_blockage ());
21235 /* If the link register is being kept alive, with the return address in it,
21236 then make sure that it does not get reused by the ce2 pass. */
21237 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21238 cfun->machine->lr_save_eliminated = 1;
21241 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21242 static void
21243 arm_print_condition (FILE *stream)
21245 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21247 /* Branch conversion is not implemented for Thumb-2. */
21248 if (TARGET_THUMB)
21250 output_operand_lossage ("predicated Thumb instruction");
21251 return;
21253 if (current_insn_predicate != NULL)
21255 output_operand_lossage
21256 ("predicated instruction in conditional sequence");
21257 return;
21260 fputs (arm_condition_codes[arm_current_cc], stream);
21262 else if (current_insn_predicate)
21264 enum arm_cond_code code;
21266 if (TARGET_THUMB1)
21268 output_operand_lossage ("predicated Thumb instruction");
21269 return;
21272 code = get_arm_condition_code (current_insn_predicate);
21273 fputs (arm_condition_codes[code], stream);
21278 /* If CODE is 'd', then the X is a condition operand and the instruction
21279 should only be executed if the condition is true.
21280 if CODE is 'D', then the X is a condition operand and the instruction
21281 should only be executed if the condition is false: however, if the mode
21282 of the comparison is CCFPEmode, then always execute the instruction -- we
21283 do this because in these circumstances !GE does not necessarily imply LT;
21284 in these cases the instruction pattern will take care to make sure that
21285 an instruction containing %d will follow, thereby undoing the effects of
21286 doing this instruction unconditionally.
21287 If CODE is 'N' then X is a floating point operand that must be negated
21288 before output.
21289 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21290 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21291 static void
21292 arm_print_operand (FILE *stream, rtx x, int code)
21294 switch (code)
21296 case '@':
21297 fputs (ASM_COMMENT_START, stream);
21298 return;
21300 case '_':
21301 fputs (user_label_prefix, stream);
21302 return;
21304 case '|':
21305 fputs (REGISTER_PREFIX, stream);
21306 return;
21308 case '?':
21309 arm_print_condition (stream);
21310 return;
21312 case '(':
21313 /* Nothing in unified syntax, otherwise the current condition code. */
21314 if (!TARGET_UNIFIED_ASM)
21315 arm_print_condition (stream);
21316 break;
21318 case ')':
21319 /* The current condition code in unified syntax, otherwise nothing. */
21320 if (TARGET_UNIFIED_ASM)
21321 arm_print_condition (stream);
21322 break;
21324 case '.':
21325 /* The current condition code for a condition code setting instruction.
21326 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21327 if (TARGET_UNIFIED_ASM)
21329 fputc('s', stream);
21330 arm_print_condition (stream);
21332 else
21334 arm_print_condition (stream);
21335 fputc('s', stream);
21337 return;
21339 case '!':
21340 /* If the instruction is conditionally executed then print
21341 the current condition code, otherwise print 's'. */
21342 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21343 if (current_insn_predicate)
21344 arm_print_condition (stream);
21345 else
21346 fputc('s', stream);
21347 break;
21349 /* %# is a "break" sequence. It doesn't output anything, but is used to
21350 separate e.g. operand numbers from following text, if that text consists
21351 of further digits which we don't want to be part of the operand
21352 number. */
21353 case '#':
21354 return;
21356 case 'N':
21358 REAL_VALUE_TYPE r;
21359 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21360 r = real_value_negate (&r);
21361 fprintf (stream, "%s", fp_const_from_val (&r));
21363 return;
21365 /* An integer or symbol address without a preceding # sign. */
21366 case 'c':
21367 switch (GET_CODE (x))
21369 case CONST_INT:
21370 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21371 break;
21373 case SYMBOL_REF:
21374 output_addr_const (stream, x);
21375 break;
21377 case CONST:
21378 if (GET_CODE (XEXP (x, 0)) == PLUS
21379 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21381 output_addr_const (stream, x);
21382 break;
21384 /* Fall through. */
21386 default:
21387 output_operand_lossage ("Unsupported operand for code '%c'", code);
21389 return;
21391 /* An integer that we want to print in HEX. */
21392 case 'x':
21393 switch (GET_CODE (x))
21395 case CONST_INT:
21396 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21397 break;
21399 default:
21400 output_operand_lossage ("Unsupported operand for code '%c'", code);
21402 return;
21404 case 'B':
21405 if (CONST_INT_P (x))
21407 HOST_WIDE_INT val;
21408 val = ARM_SIGN_EXTEND (~INTVAL (x));
21409 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21411 else
21413 putc ('~', stream);
21414 output_addr_const (stream, x);
21416 return;
21418 case 'L':
21419 /* The low 16 bits of an immediate constant. */
21420 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21421 return;
21423 case 'i':
21424 fprintf (stream, "%s", arithmetic_instr (x, 1));
21425 return;
21427 case 'I':
21428 fprintf (stream, "%s", arithmetic_instr (x, 0));
21429 return;
21431 case 'S':
21433 HOST_WIDE_INT val;
21434 const char *shift;
21436 shift = shift_op (x, &val);
21438 if (shift)
21440 fprintf (stream, ", %s ", shift);
21441 if (val == -1)
21442 arm_print_operand (stream, XEXP (x, 1), 0);
21443 else
21444 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21447 return;
21449 /* An explanation of the 'Q', 'R' and 'H' register operands:
21451 In a pair of registers containing a DI or DF value the 'Q'
21452 operand returns the register number of the register containing
21453 the least significant part of the value. The 'R' operand returns
21454 the register number of the register containing the most
21455 significant part of the value.
21457 The 'H' operand returns the higher of the two register numbers.
21458 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21459 same as the 'Q' operand, since the most significant part of the
21460 value is held in the lower number register. The reverse is true
21461 on systems where WORDS_BIG_ENDIAN is false.
21463 The purpose of these operands is to distinguish between cases
21464 where the endian-ness of the values is important (for example
21465 when they are added together), and cases where the endian-ness
21466 is irrelevant, but the order of register operations is important.
21467 For example when loading a value from memory into a register
21468 pair, the endian-ness does not matter. Provided that the value
21469 from the lower memory address is put into the lower numbered
21470 register, and the value from the higher address is put into the
21471 higher numbered register, the load will work regardless of whether
21472 the value being loaded is big-wordian or little-wordian. The
21473 order of the two register loads can matter however, if the address
21474 of the memory location is actually held in one of the registers
21475 being overwritten by the load.
21477 The 'Q' and 'R' constraints are also available for 64-bit
21478 constants. */
21479 case 'Q':
21480 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21482 rtx part = gen_lowpart (SImode, x);
21483 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21484 return;
21487 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21489 output_operand_lossage ("invalid operand for code '%c'", code);
21490 return;
21493 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21494 return;
21496 case 'R':
21497 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21499 enum machine_mode mode = GET_MODE (x);
21500 rtx part;
21502 if (mode == VOIDmode)
21503 mode = DImode;
21504 part = gen_highpart_mode (SImode, mode, x);
21505 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21506 return;
21509 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21511 output_operand_lossage ("invalid operand for code '%c'", code);
21512 return;
21515 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21516 return;
21518 case 'H':
21519 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21521 output_operand_lossage ("invalid operand for code '%c'", code);
21522 return;
21525 asm_fprintf (stream, "%r", REGNO (x) + 1);
21526 return;
21528 case 'J':
21529 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21531 output_operand_lossage ("invalid operand for code '%c'", code);
21532 return;
21535 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21536 return;
21538 case 'K':
21539 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21541 output_operand_lossage ("invalid operand for code '%c'", code);
21542 return;
21545 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21546 return;
21548 case 'm':
21549 asm_fprintf (stream, "%r",
21550 REG_P (XEXP (x, 0))
21551 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21552 return;
21554 case 'M':
21555 asm_fprintf (stream, "{%r-%r}",
21556 REGNO (x),
21557 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21558 return;
21560 /* Like 'M', but writing doubleword vector registers, for use by Neon
21561 insns. */
21562 case 'h':
21564 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21565 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21566 if (numregs == 1)
21567 asm_fprintf (stream, "{d%d}", regno);
21568 else
21569 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21571 return;
21573 case 'd':
21574 /* CONST_TRUE_RTX means always -- that's the default. */
21575 if (x == const_true_rtx)
21576 return;
21578 if (!COMPARISON_P (x))
21580 output_operand_lossage ("invalid operand for code '%c'", code);
21581 return;
21584 fputs (arm_condition_codes[get_arm_condition_code (x)],
21585 stream);
21586 return;
21588 case 'D':
21589 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21590 want to do that. */
21591 if (x == const_true_rtx)
21593 output_operand_lossage ("instruction never executed");
21594 return;
21596 if (!COMPARISON_P (x))
21598 output_operand_lossage ("invalid operand for code '%c'", code);
21599 return;
21602 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21603 (get_arm_condition_code (x))],
21604 stream);
21605 return;
21607 case 's':
21608 case 'V':
21609 case 'W':
21610 case 'X':
21611 case 'Y':
21612 case 'Z':
21613 /* Former Maverick support, removed after GCC-4.7. */
21614 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21615 return;
21617 case 'U':
21618 if (!REG_P (x)
21619 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21620 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21621 /* Bad value for wCG register number. */
21623 output_operand_lossage ("invalid operand for code '%c'", code);
21624 return;
21627 else
21628 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21629 return;
21631 /* Print an iWMMXt control register name. */
21632 case 'w':
21633 if (!CONST_INT_P (x)
21634 || INTVAL (x) < 0
21635 || INTVAL (x) >= 16)
21636 /* Bad value for wC register number. */
21638 output_operand_lossage ("invalid operand for code '%c'", code);
21639 return;
21642 else
21644 static const char * wc_reg_names [16] =
21646 "wCID", "wCon", "wCSSF", "wCASF",
21647 "wC4", "wC5", "wC6", "wC7",
21648 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21649 "wC12", "wC13", "wC14", "wC15"
21652 fputs (wc_reg_names [INTVAL (x)], stream);
21654 return;
21656 /* Print the high single-precision register of a VFP double-precision
21657 register. */
21658 case 'p':
21660 enum machine_mode mode = GET_MODE (x);
21661 int regno;
21663 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21665 output_operand_lossage ("invalid operand for code '%c'", code);
21666 return;
21669 regno = REGNO (x);
21670 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21672 output_operand_lossage ("invalid operand for code '%c'", code);
21673 return;
21676 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21678 return;
21680 /* Print a VFP/Neon double precision or quad precision register name. */
21681 case 'P':
21682 case 'q':
21684 enum machine_mode mode = GET_MODE (x);
21685 int is_quad = (code == 'q');
21686 int regno;
21688 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21690 output_operand_lossage ("invalid operand for code '%c'", code);
21691 return;
21694 if (!REG_P (x)
21695 || !IS_VFP_REGNUM (REGNO (x)))
21697 output_operand_lossage ("invalid operand for code '%c'", code);
21698 return;
21701 regno = REGNO (x);
21702 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21703 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21705 output_operand_lossage ("invalid operand for code '%c'", code);
21706 return;
21709 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21710 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21712 return;
21714 /* These two codes print the low/high doubleword register of a Neon quad
21715 register, respectively. For pair-structure types, can also print
21716 low/high quadword registers. */
21717 case 'e':
21718 case 'f':
21720 enum machine_mode mode = GET_MODE (x);
21721 int regno;
21723 if ((GET_MODE_SIZE (mode) != 16
21724 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21726 output_operand_lossage ("invalid operand for code '%c'", code);
21727 return;
21730 regno = REGNO (x);
21731 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21733 output_operand_lossage ("invalid operand for code '%c'", code);
21734 return;
21737 if (GET_MODE_SIZE (mode) == 16)
21738 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21739 + (code == 'f' ? 1 : 0));
21740 else
21741 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21742 + (code == 'f' ? 1 : 0));
21744 return;
21746 /* Print a VFPv3 floating-point constant, represented as an integer
21747 index. */
21748 case 'G':
21750 int index = vfp3_const_double_index (x);
21751 gcc_assert (index != -1);
21752 fprintf (stream, "%d", index);
21754 return;
21756 /* Print bits representing opcode features for Neon.
21758 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21759 and polynomials as unsigned.
21761 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21763 Bit 2 is 1 for rounding functions, 0 otherwise. */
21765 /* Identify the type as 's', 'u', 'p' or 'f'. */
21766 case 'T':
21768 HOST_WIDE_INT bits = INTVAL (x);
21769 fputc ("uspf"[bits & 3], stream);
21771 return;
21773 /* Likewise, but signed and unsigned integers are both 'i'. */
21774 case 'F':
21776 HOST_WIDE_INT bits = INTVAL (x);
21777 fputc ("iipf"[bits & 3], stream);
21779 return;
21781 /* As for 'T', but emit 'u' instead of 'p'. */
21782 case 't':
21784 HOST_WIDE_INT bits = INTVAL (x);
21785 fputc ("usuf"[bits & 3], stream);
21787 return;
21789 /* Bit 2: rounding (vs none). */
21790 case 'O':
21792 HOST_WIDE_INT bits = INTVAL (x);
21793 fputs ((bits & 4) != 0 ? "r" : "", stream);
21795 return;
21797 /* Memory operand for vld1/vst1 instruction. */
21798 case 'A':
21800 rtx addr;
21801 bool postinc = FALSE;
21802 unsigned align, memsize, align_bits;
21804 gcc_assert (MEM_P (x));
21805 addr = XEXP (x, 0);
21806 if (GET_CODE (addr) == POST_INC)
21808 postinc = 1;
21809 addr = XEXP (addr, 0);
21811 asm_fprintf (stream, "[%r", REGNO (addr));
21813 /* We know the alignment of this access, so we can emit a hint in the
21814 instruction (for some alignments) as an aid to the memory subsystem
21815 of the target. */
21816 align = MEM_ALIGN (x) >> 3;
21817 memsize = MEM_SIZE (x);
21819 /* Only certain alignment specifiers are supported by the hardware. */
21820 if (memsize == 32 && (align % 32) == 0)
21821 align_bits = 256;
21822 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21823 align_bits = 128;
21824 else if (memsize >= 8 && (align % 8) == 0)
21825 align_bits = 64;
21826 else
21827 align_bits = 0;
21829 if (align_bits != 0)
21830 asm_fprintf (stream, ":%d", align_bits);
21832 asm_fprintf (stream, "]");
21834 if (postinc)
21835 fputs("!", stream);
21837 return;
21839 case 'C':
21841 rtx addr;
21843 gcc_assert (MEM_P (x));
21844 addr = XEXP (x, 0);
21845 gcc_assert (REG_P (addr));
21846 asm_fprintf (stream, "[%r]", REGNO (addr));
21848 return;
21850 /* Translate an S register number into a D register number and element index. */
21851 case 'y':
21853 enum machine_mode mode = GET_MODE (x);
21854 int regno;
21856 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21858 output_operand_lossage ("invalid operand for code '%c'", code);
21859 return;
21862 regno = REGNO (x);
21863 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21865 output_operand_lossage ("invalid operand for code '%c'", code);
21866 return;
21869 regno = regno - FIRST_VFP_REGNUM;
21870 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21872 return;
21874 case 'v':
21875 gcc_assert (CONST_DOUBLE_P (x));
21876 int result;
21877 result = vfp3_const_double_for_fract_bits (x);
21878 if (result == 0)
21879 result = vfp3_const_double_for_bits (x);
21880 fprintf (stream, "#%d", result);
21881 return;
21883 /* Register specifier for vld1.16/vst1.16. Translate the S register
21884 number into a D register number and element index. */
21885 case 'z':
21887 enum machine_mode mode = GET_MODE (x);
21888 int regno;
21890 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21892 output_operand_lossage ("invalid operand for code '%c'", code);
21893 return;
21896 regno = REGNO (x);
21897 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21899 output_operand_lossage ("invalid operand for code '%c'", code);
21900 return;
21903 regno = regno - FIRST_VFP_REGNUM;
21904 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21906 return;
21908 default:
21909 if (x == 0)
21911 output_operand_lossage ("missing operand");
21912 return;
21915 switch (GET_CODE (x))
21917 case REG:
21918 asm_fprintf (stream, "%r", REGNO (x));
21919 break;
21921 case MEM:
21922 output_memory_reference_mode = GET_MODE (x);
21923 output_address (XEXP (x, 0));
21924 break;
21926 case CONST_DOUBLE:
21927 if (TARGET_NEON)
21929 char fpstr[20];
21930 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21931 sizeof (fpstr), 0, 1);
21932 fprintf (stream, "#%s", fpstr);
21934 else
21935 fprintf (stream, "#%s", fp_immediate_constant (x));
21936 break;
21938 default:
21939 gcc_assert (GET_CODE (x) != NEG);
21940 fputc ('#', stream);
21941 if (GET_CODE (x) == HIGH)
21943 fputs (":lower16:", stream);
21944 x = XEXP (x, 0);
21947 output_addr_const (stream, x);
21948 break;
21953 /* Target hook for printing a memory address. */
21954 static void
21955 arm_print_operand_address (FILE *stream, rtx x)
21957 if (TARGET_32BIT)
21959 int is_minus = GET_CODE (x) == MINUS;
21961 if (REG_P (x))
21962 asm_fprintf (stream, "[%r]", REGNO (x));
21963 else if (GET_CODE (x) == PLUS || is_minus)
21965 rtx base = XEXP (x, 0);
21966 rtx index = XEXP (x, 1);
21967 HOST_WIDE_INT offset = 0;
21968 if (!REG_P (base)
21969 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21971 /* Ensure that BASE is a register. */
21972 /* (one of them must be). */
21973 /* Also ensure the SP is not used as in index register. */
21974 rtx temp = base;
21975 base = index;
21976 index = temp;
21978 switch (GET_CODE (index))
21980 case CONST_INT:
21981 offset = INTVAL (index);
21982 if (is_minus)
21983 offset = -offset;
21984 asm_fprintf (stream, "[%r, #%wd]",
21985 REGNO (base), offset);
21986 break;
21988 case REG:
21989 asm_fprintf (stream, "[%r, %s%r]",
21990 REGNO (base), is_minus ? "-" : "",
21991 REGNO (index));
21992 break;
21994 case MULT:
21995 case ASHIFTRT:
21996 case LSHIFTRT:
21997 case ASHIFT:
21998 case ROTATERT:
22000 asm_fprintf (stream, "[%r, %s%r",
22001 REGNO (base), is_minus ? "-" : "",
22002 REGNO (XEXP (index, 0)));
22003 arm_print_operand (stream, index, 'S');
22004 fputs ("]", stream);
22005 break;
22008 default:
22009 gcc_unreachable ();
22012 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22013 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22015 extern enum machine_mode output_memory_reference_mode;
22017 gcc_assert (REG_P (XEXP (x, 0)));
22019 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22020 asm_fprintf (stream, "[%r, #%s%d]!",
22021 REGNO (XEXP (x, 0)),
22022 GET_CODE (x) == PRE_DEC ? "-" : "",
22023 GET_MODE_SIZE (output_memory_reference_mode));
22024 else
22025 asm_fprintf (stream, "[%r], #%s%d",
22026 REGNO (XEXP (x, 0)),
22027 GET_CODE (x) == POST_DEC ? "-" : "",
22028 GET_MODE_SIZE (output_memory_reference_mode));
22030 else if (GET_CODE (x) == PRE_MODIFY)
22032 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22033 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22034 asm_fprintf (stream, "#%wd]!",
22035 INTVAL (XEXP (XEXP (x, 1), 1)));
22036 else
22037 asm_fprintf (stream, "%r]!",
22038 REGNO (XEXP (XEXP (x, 1), 1)));
22040 else if (GET_CODE (x) == POST_MODIFY)
22042 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22043 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22044 asm_fprintf (stream, "#%wd",
22045 INTVAL (XEXP (XEXP (x, 1), 1)));
22046 else
22047 asm_fprintf (stream, "%r",
22048 REGNO (XEXP (XEXP (x, 1), 1)));
22050 else output_addr_const (stream, x);
22052 else
22054 if (REG_P (x))
22055 asm_fprintf (stream, "[%r]", REGNO (x));
22056 else if (GET_CODE (x) == POST_INC)
22057 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22058 else if (GET_CODE (x) == PLUS)
22060 gcc_assert (REG_P (XEXP (x, 0)));
22061 if (CONST_INT_P (XEXP (x, 1)))
22062 asm_fprintf (stream, "[%r, #%wd]",
22063 REGNO (XEXP (x, 0)),
22064 INTVAL (XEXP (x, 1)));
22065 else
22066 asm_fprintf (stream, "[%r, %r]",
22067 REGNO (XEXP (x, 0)),
22068 REGNO (XEXP (x, 1)));
22070 else
22071 output_addr_const (stream, x);
22075 /* Target hook for indicating whether a punctuation character for
22076 TARGET_PRINT_OPERAND is valid. */
22077 static bool
22078 arm_print_operand_punct_valid_p (unsigned char code)
22080 return (code == '@' || code == '|' || code == '.'
22081 || code == '(' || code == ')' || code == '#'
22082 || (TARGET_32BIT && (code == '?'))
22083 || (TARGET_THUMB2 && (code == '!'))
22084 || (TARGET_THUMB && (code == '_')));
22087 /* Target hook for assembling integer objects. The ARM version needs to
22088 handle word-sized values specially. */
22089 static bool
22090 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22092 enum machine_mode mode;
22094 if (size == UNITS_PER_WORD && aligned_p)
22096 fputs ("\t.word\t", asm_out_file);
22097 output_addr_const (asm_out_file, x);
22099 /* Mark symbols as position independent. We only do this in the
22100 .text segment, not in the .data segment. */
22101 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22102 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22104 /* See legitimize_pic_address for an explanation of the
22105 TARGET_VXWORKS_RTP check. */
22106 if (!arm_pic_data_is_text_relative
22107 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22108 fputs ("(GOT)", asm_out_file);
22109 else
22110 fputs ("(GOTOFF)", asm_out_file);
22112 fputc ('\n', asm_out_file);
22113 return true;
22116 mode = GET_MODE (x);
22118 if (arm_vector_mode_supported_p (mode))
22120 int i, units;
22122 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22124 units = CONST_VECTOR_NUNITS (x);
22125 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22127 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22128 for (i = 0; i < units; i++)
22130 rtx elt = CONST_VECTOR_ELT (x, i);
22131 assemble_integer
22132 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22134 else
22135 for (i = 0; i < units; i++)
22137 rtx elt = CONST_VECTOR_ELT (x, i);
22138 REAL_VALUE_TYPE rval;
22140 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22142 assemble_real
22143 (rval, GET_MODE_INNER (mode),
22144 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22147 return true;
22150 return default_assemble_integer (x, size, aligned_p);
22153 static void
22154 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22156 section *s;
22158 if (!TARGET_AAPCS_BASED)
22160 (is_ctor ?
22161 default_named_section_asm_out_constructor
22162 : default_named_section_asm_out_destructor) (symbol, priority);
22163 return;
22166 /* Put these in the .init_array section, using a special relocation. */
22167 if (priority != DEFAULT_INIT_PRIORITY)
22169 char buf[18];
22170 sprintf (buf, "%s.%.5u",
22171 is_ctor ? ".init_array" : ".fini_array",
22172 priority);
22173 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22175 else if (is_ctor)
22176 s = ctors_section;
22177 else
22178 s = dtors_section;
22180 switch_to_section (s);
22181 assemble_align (POINTER_SIZE);
22182 fputs ("\t.word\t", asm_out_file);
22183 output_addr_const (asm_out_file, symbol);
22184 fputs ("(target1)\n", asm_out_file);
22187 /* Add a function to the list of static constructors. */
22189 static void
22190 arm_elf_asm_constructor (rtx symbol, int priority)
22192 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22195 /* Add a function to the list of static destructors. */
22197 static void
22198 arm_elf_asm_destructor (rtx symbol, int priority)
22200 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22203 /* A finite state machine takes care of noticing whether or not instructions
22204 can be conditionally executed, and thus decrease execution time and code
22205 size by deleting branch instructions. The fsm is controlled by
22206 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22208 /* The state of the fsm controlling condition codes are:
22209 0: normal, do nothing special
22210 1: make ASM_OUTPUT_OPCODE not output this instruction
22211 2: make ASM_OUTPUT_OPCODE not output this instruction
22212 3: make instructions conditional
22213 4: make instructions conditional
22215 State transitions (state->state by whom under condition):
22216 0 -> 1 final_prescan_insn if the `target' is a label
22217 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22218 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22219 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22220 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22221 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22222 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22223 (the target insn is arm_target_insn).
22225 If the jump clobbers the conditions then we use states 2 and 4.
22227 A similar thing can be done with conditional return insns.
22229 XXX In case the `target' is an unconditional branch, this conditionalising
22230 of the instructions always reduces code size, but not always execution
22231 time. But then, I want to reduce the code size to somewhere near what
22232 /bin/cc produces. */
22234 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22235 instructions. When a COND_EXEC instruction is seen the subsequent
22236 instructions are scanned so that multiple conditional instructions can be
22237 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22238 specify the length and true/false mask for the IT block. These will be
22239 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22241 /* Returns the index of the ARM condition code string in
22242 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22243 COMPARISON should be an rtx like `(eq (...) (...))'. */
22245 enum arm_cond_code
22246 maybe_get_arm_condition_code (rtx comparison)
22248 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22249 enum arm_cond_code code;
22250 enum rtx_code comp_code = GET_CODE (comparison);
22252 if (GET_MODE_CLASS (mode) != MODE_CC)
22253 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22254 XEXP (comparison, 1));
22256 switch (mode)
22258 case CC_DNEmode: code = ARM_NE; goto dominance;
22259 case CC_DEQmode: code = ARM_EQ; goto dominance;
22260 case CC_DGEmode: code = ARM_GE; goto dominance;
22261 case CC_DGTmode: code = ARM_GT; goto dominance;
22262 case CC_DLEmode: code = ARM_LE; goto dominance;
22263 case CC_DLTmode: code = ARM_LT; goto dominance;
22264 case CC_DGEUmode: code = ARM_CS; goto dominance;
22265 case CC_DGTUmode: code = ARM_HI; goto dominance;
22266 case CC_DLEUmode: code = ARM_LS; goto dominance;
22267 case CC_DLTUmode: code = ARM_CC;
22269 dominance:
22270 if (comp_code == EQ)
22271 return ARM_INVERSE_CONDITION_CODE (code);
22272 if (comp_code == NE)
22273 return code;
22274 return ARM_NV;
22276 case CC_NOOVmode:
22277 switch (comp_code)
22279 case NE: return ARM_NE;
22280 case EQ: return ARM_EQ;
22281 case GE: return ARM_PL;
22282 case LT: return ARM_MI;
22283 default: return ARM_NV;
22286 case CC_Zmode:
22287 switch (comp_code)
22289 case NE: return ARM_NE;
22290 case EQ: return ARM_EQ;
22291 default: return ARM_NV;
22294 case CC_Nmode:
22295 switch (comp_code)
22297 case NE: return ARM_MI;
22298 case EQ: return ARM_PL;
22299 default: return ARM_NV;
22302 case CCFPEmode:
22303 case CCFPmode:
22304 /* We can handle all cases except UNEQ and LTGT. */
22305 switch (comp_code)
22307 case GE: return ARM_GE;
22308 case GT: return ARM_GT;
22309 case LE: return ARM_LS;
22310 case LT: return ARM_MI;
22311 case NE: return ARM_NE;
22312 case EQ: return ARM_EQ;
22313 case ORDERED: return ARM_VC;
22314 case UNORDERED: return ARM_VS;
22315 case UNLT: return ARM_LT;
22316 case UNLE: return ARM_LE;
22317 case UNGT: return ARM_HI;
22318 case UNGE: return ARM_PL;
22319 /* UNEQ and LTGT do not have a representation. */
22320 case UNEQ: /* Fall through. */
22321 case LTGT: /* Fall through. */
22322 default: return ARM_NV;
22325 case CC_SWPmode:
22326 switch (comp_code)
22328 case NE: return ARM_NE;
22329 case EQ: return ARM_EQ;
22330 case GE: return ARM_LE;
22331 case GT: return ARM_LT;
22332 case LE: return ARM_GE;
22333 case LT: return ARM_GT;
22334 case GEU: return ARM_LS;
22335 case GTU: return ARM_CC;
22336 case LEU: return ARM_CS;
22337 case LTU: return ARM_HI;
22338 default: return ARM_NV;
22341 case CC_Cmode:
22342 switch (comp_code)
22344 case LTU: return ARM_CS;
22345 case GEU: return ARM_CC;
22346 default: return ARM_NV;
22349 case CC_CZmode:
22350 switch (comp_code)
22352 case NE: return ARM_NE;
22353 case EQ: return ARM_EQ;
22354 case GEU: return ARM_CS;
22355 case GTU: return ARM_HI;
22356 case LEU: return ARM_LS;
22357 case LTU: return ARM_CC;
22358 default: return ARM_NV;
22361 case CC_NCVmode:
22362 switch (comp_code)
22364 case GE: return ARM_GE;
22365 case LT: return ARM_LT;
22366 case GEU: return ARM_CS;
22367 case LTU: return ARM_CC;
22368 default: return ARM_NV;
22371 case CCmode:
22372 switch (comp_code)
22374 case NE: return ARM_NE;
22375 case EQ: return ARM_EQ;
22376 case GE: return ARM_GE;
22377 case GT: return ARM_GT;
22378 case LE: return ARM_LE;
22379 case LT: return ARM_LT;
22380 case GEU: return ARM_CS;
22381 case GTU: return ARM_HI;
22382 case LEU: return ARM_LS;
22383 case LTU: return ARM_CC;
22384 default: return ARM_NV;
22387 default: gcc_unreachable ();
22391 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22392 static enum arm_cond_code
22393 get_arm_condition_code (rtx comparison)
22395 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22396 gcc_assert (code != ARM_NV);
22397 return code;
22400 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22401 instructions. */
22402 void
22403 thumb2_final_prescan_insn (rtx insn)
22405 rtx first_insn = insn;
22406 rtx body = PATTERN (insn);
22407 rtx predicate;
22408 enum arm_cond_code code;
22409 int n;
22410 int mask;
22411 int max;
22413 /* max_insns_skipped in the tune was already taken into account in the
22414 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22415 just emit the IT blocks as we can. It does not make sense to split
22416 the IT blocks. */
22417 max = MAX_INSN_PER_IT_BLOCK;
22419 /* Remove the previous insn from the count of insns to be output. */
22420 if (arm_condexec_count)
22421 arm_condexec_count--;
22423 /* Nothing to do if we are already inside a conditional block. */
22424 if (arm_condexec_count)
22425 return;
22427 if (GET_CODE (body) != COND_EXEC)
22428 return;
22430 /* Conditional jumps are implemented directly. */
22431 if (JUMP_P (insn))
22432 return;
22434 predicate = COND_EXEC_TEST (body);
22435 arm_current_cc = get_arm_condition_code (predicate);
22437 n = get_attr_ce_count (insn);
22438 arm_condexec_count = 1;
22439 arm_condexec_mask = (1 << n) - 1;
22440 arm_condexec_masklen = n;
22441 /* See if subsequent instructions can be combined into the same block. */
22442 for (;;)
22444 insn = next_nonnote_insn (insn);
22446 /* Jumping into the middle of an IT block is illegal, so a label or
22447 barrier terminates the block. */
22448 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22449 break;
22451 body = PATTERN (insn);
22452 /* USE and CLOBBER aren't really insns, so just skip them. */
22453 if (GET_CODE (body) == USE
22454 || GET_CODE (body) == CLOBBER)
22455 continue;
22457 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22458 if (GET_CODE (body) != COND_EXEC)
22459 break;
22460 /* Maximum number of conditionally executed instructions in a block. */
22461 n = get_attr_ce_count (insn);
22462 if (arm_condexec_masklen + n > max)
22463 break;
22465 predicate = COND_EXEC_TEST (body);
22466 code = get_arm_condition_code (predicate);
22467 mask = (1 << n) - 1;
22468 if (arm_current_cc == code)
22469 arm_condexec_mask |= (mask << arm_condexec_masklen);
22470 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22471 break;
22473 arm_condexec_count++;
22474 arm_condexec_masklen += n;
22476 /* A jump must be the last instruction in a conditional block. */
22477 if (JUMP_P (insn))
22478 break;
22480 /* Restore recog_data (getting the attributes of other insns can
22481 destroy this array, but final.c assumes that it remains intact
22482 across this call). */
22483 extract_constrain_insn_cached (first_insn);
22486 void
22487 arm_final_prescan_insn (rtx insn)
22489 /* BODY will hold the body of INSN. */
22490 rtx body = PATTERN (insn);
22492 /* This will be 1 if trying to repeat the trick, and things need to be
22493 reversed if it appears to fail. */
22494 int reverse = 0;
22496 /* If we start with a return insn, we only succeed if we find another one. */
22497 int seeking_return = 0;
22498 enum rtx_code return_code = UNKNOWN;
22500 /* START_INSN will hold the insn from where we start looking. This is the
22501 first insn after the following code_label if REVERSE is true. */
22502 rtx start_insn = insn;
22504 /* If in state 4, check if the target branch is reached, in order to
22505 change back to state 0. */
22506 if (arm_ccfsm_state == 4)
22508 if (insn == arm_target_insn)
22510 arm_target_insn = NULL;
22511 arm_ccfsm_state = 0;
22513 return;
22516 /* If in state 3, it is possible to repeat the trick, if this insn is an
22517 unconditional branch to a label, and immediately following this branch
22518 is the previous target label which is only used once, and the label this
22519 branch jumps to is not too far off. */
22520 if (arm_ccfsm_state == 3)
22522 if (simplejump_p (insn))
22524 start_insn = next_nonnote_insn (start_insn);
22525 if (BARRIER_P (start_insn))
22527 /* XXX Isn't this always a barrier? */
22528 start_insn = next_nonnote_insn (start_insn);
22530 if (LABEL_P (start_insn)
22531 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22532 && LABEL_NUSES (start_insn) == 1)
22533 reverse = TRUE;
22534 else
22535 return;
22537 else if (ANY_RETURN_P (body))
22539 start_insn = next_nonnote_insn (start_insn);
22540 if (BARRIER_P (start_insn))
22541 start_insn = next_nonnote_insn (start_insn);
22542 if (LABEL_P (start_insn)
22543 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22544 && LABEL_NUSES (start_insn) == 1)
22546 reverse = TRUE;
22547 seeking_return = 1;
22548 return_code = GET_CODE (body);
22550 else
22551 return;
22553 else
22554 return;
22557 gcc_assert (!arm_ccfsm_state || reverse);
22558 if (!JUMP_P (insn))
22559 return;
22561 /* This jump might be paralleled with a clobber of the condition codes
22562 the jump should always come first */
22563 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22564 body = XVECEXP (body, 0, 0);
22566 if (reverse
22567 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22568 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22570 int insns_skipped;
22571 int fail = FALSE, succeed = FALSE;
22572 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22573 int then_not_else = TRUE;
22574 rtx this_insn = start_insn, label = 0;
22576 /* Register the insn jumped to. */
22577 if (reverse)
22579 if (!seeking_return)
22580 label = XEXP (SET_SRC (body), 0);
22582 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22583 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22584 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22586 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22587 then_not_else = FALSE;
22589 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22591 seeking_return = 1;
22592 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22594 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22596 seeking_return = 1;
22597 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22598 then_not_else = FALSE;
22600 else
22601 gcc_unreachable ();
22603 /* See how many insns this branch skips, and what kind of insns. If all
22604 insns are okay, and the label or unconditional branch to the same
22605 label is not too far away, succeed. */
22606 for (insns_skipped = 0;
22607 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22609 rtx scanbody;
22611 this_insn = next_nonnote_insn (this_insn);
22612 if (!this_insn)
22613 break;
22615 switch (GET_CODE (this_insn))
22617 case CODE_LABEL:
22618 /* Succeed if it is the target label, otherwise fail since
22619 control falls in from somewhere else. */
22620 if (this_insn == label)
22622 arm_ccfsm_state = 1;
22623 succeed = TRUE;
22625 else
22626 fail = TRUE;
22627 break;
22629 case BARRIER:
22630 /* Succeed if the following insn is the target label.
22631 Otherwise fail.
22632 If return insns are used then the last insn in a function
22633 will be a barrier. */
22634 this_insn = next_nonnote_insn (this_insn);
22635 if (this_insn && this_insn == label)
22637 arm_ccfsm_state = 1;
22638 succeed = TRUE;
22640 else
22641 fail = TRUE;
22642 break;
22644 case CALL_INSN:
22645 /* The AAPCS says that conditional calls should not be
22646 used since they make interworking inefficient (the
22647 linker can't transform BL<cond> into BLX). That's
22648 only a problem if the machine has BLX. */
22649 if (arm_arch5)
22651 fail = TRUE;
22652 break;
22655 /* Succeed if the following insn is the target label, or
22656 if the following two insns are a barrier and the
22657 target label. */
22658 this_insn = next_nonnote_insn (this_insn);
22659 if (this_insn && BARRIER_P (this_insn))
22660 this_insn = next_nonnote_insn (this_insn);
22662 if (this_insn && this_insn == label
22663 && insns_skipped < max_insns_skipped)
22665 arm_ccfsm_state = 1;
22666 succeed = TRUE;
22668 else
22669 fail = TRUE;
22670 break;
22672 case JUMP_INSN:
22673 /* If this is an unconditional branch to the same label, succeed.
22674 If it is to another label, do nothing. If it is conditional,
22675 fail. */
22676 /* XXX Probably, the tests for SET and the PC are
22677 unnecessary. */
22679 scanbody = PATTERN (this_insn);
22680 if (GET_CODE (scanbody) == SET
22681 && GET_CODE (SET_DEST (scanbody)) == PC)
22683 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22684 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22686 arm_ccfsm_state = 2;
22687 succeed = TRUE;
22689 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22690 fail = TRUE;
22692 /* Fail if a conditional return is undesirable (e.g. on a
22693 StrongARM), but still allow this if optimizing for size. */
22694 else if (GET_CODE (scanbody) == return_code
22695 && !use_return_insn (TRUE, NULL)
22696 && !optimize_size)
22697 fail = TRUE;
22698 else if (GET_CODE (scanbody) == return_code)
22700 arm_ccfsm_state = 2;
22701 succeed = TRUE;
22703 else if (GET_CODE (scanbody) == PARALLEL)
22705 switch (get_attr_conds (this_insn))
22707 case CONDS_NOCOND:
22708 break;
22709 default:
22710 fail = TRUE;
22711 break;
22714 else
22715 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22717 break;
22719 case INSN:
22720 /* Instructions using or affecting the condition codes make it
22721 fail. */
22722 scanbody = PATTERN (this_insn);
22723 if (!(GET_CODE (scanbody) == SET
22724 || GET_CODE (scanbody) == PARALLEL)
22725 || get_attr_conds (this_insn) != CONDS_NOCOND)
22726 fail = TRUE;
22727 break;
22729 default:
22730 break;
22733 if (succeed)
22735 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22736 arm_target_label = CODE_LABEL_NUMBER (label);
22737 else
22739 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22741 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22743 this_insn = next_nonnote_insn (this_insn);
22744 gcc_assert (!this_insn
22745 || (!BARRIER_P (this_insn)
22746 && !LABEL_P (this_insn)));
22748 if (!this_insn)
22750 /* Oh, dear! we ran off the end.. give up. */
22751 extract_constrain_insn_cached (insn);
22752 arm_ccfsm_state = 0;
22753 arm_target_insn = NULL;
22754 return;
22756 arm_target_insn = this_insn;
22759 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22760 what it was. */
22761 if (!reverse)
22762 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22764 if (reverse || then_not_else)
22765 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22768 /* Restore recog_data (getting the attributes of other insns can
22769 destroy this array, but final.c assumes that it remains intact
22770 across this call. */
22771 extract_constrain_insn_cached (insn);
22775 /* Output IT instructions. */
22776 void
22777 thumb2_asm_output_opcode (FILE * stream)
22779 char buff[5];
22780 int n;
22782 if (arm_condexec_mask)
22784 for (n = 0; n < arm_condexec_masklen; n++)
22785 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22786 buff[n] = 0;
22787 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22788 arm_condition_codes[arm_current_cc]);
22789 arm_condexec_mask = 0;
22793 /* Returns true if REGNO is a valid register
22794 for holding a quantity of type MODE. */
22796 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22798 if (GET_MODE_CLASS (mode) == MODE_CC)
22799 return (regno == CC_REGNUM
22800 || (TARGET_HARD_FLOAT && TARGET_VFP
22801 && regno == VFPCC_REGNUM));
22803 if (TARGET_THUMB1)
22804 /* For the Thumb we only allow values bigger than SImode in
22805 registers 0 - 6, so that there is always a second low
22806 register available to hold the upper part of the value.
22807 We probably we ought to ensure that the register is the
22808 start of an even numbered register pair. */
22809 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22811 if (TARGET_HARD_FLOAT && TARGET_VFP
22812 && IS_VFP_REGNUM (regno))
22814 if (mode == SFmode || mode == SImode)
22815 return VFP_REGNO_OK_FOR_SINGLE (regno);
22817 if (mode == DFmode)
22818 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22820 /* VFP registers can hold HFmode values, but there is no point in
22821 putting them there unless we have hardware conversion insns. */
22822 if (mode == HFmode)
22823 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22825 if (TARGET_NEON)
22826 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22827 || (VALID_NEON_QREG_MODE (mode)
22828 && NEON_REGNO_OK_FOR_QUAD (regno))
22829 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22830 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22831 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22832 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22833 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22835 return FALSE;
22838 if (TARGET_REALLY_IWMMXT)
22840 if (IS_IWMMXT_GR_REGNUM (regno))
22841 return mode == SImode;
22843 if (IS_IWMMXT_REGNUM (regno))
22844 return VALID_IWMMXT_REG_MODE (mode);
22847 /* We allow almost any value to be stored in the general registers.
22848 Restrict doubleword quantities to even register pairs in ARM state
22849 so that we can use ldrd. Do not allow very large Neon structure
22850 opaque modes in general registers; they would use too many. */
22851 if (regno <= LAST_ARM_REGNUM)
22853 if (ARM_NUM_REGS (mode) > 4)
22854 return FALSE;
22856 if (TARGET_THUMB2)
22857 return TRUE;
22859 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22862 if (regno == FRAME_POINTER_REGNUM
22863 || regno == ARG_POINTER_REGNUM)
22864 /* We only allow integers in the fake hard registers. */
22865 return GET_MODE_CLASS (mode) == MODE_INT;
22867 return FALSE;
22870 /* Implement MODES_TIEABLE_P. */
22872 bool
22873 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22875 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22876 return true;
22878 /* We specifically want to allow elements of "structure" modes to
22879 be tieable to the structure. This more general condition allows
22880 other rarer situations too. */
22881 if (TARGET_NEON
22882 && (VALID_NEON_DREG_MODE (mode1)
22883 || VALID_NEON_QREG_MODE (mode1)
22884 || VALID_NEON_STRUCT_MODE (mode1))
22885 && (VALID_NEON_DREG_MODE (mode2)
22886 || VALID_NEON_QREG_MODE (mode2)
22887 || VALID_NEON_STRUCT_MODE (mode2)))
22888 return true;
22890 return false;
22893 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22894 not used in arm mode. */
22896 enum reg_class
22897 arm_regno_class (int regno)
22899 if (TARGET_THUMB1)
22901 if (regno == STACK_POINTER_REGNUM)
22902 return STACK_REG;
22903 if (regno == CC_REGNUM)
22904 return CC_REG;
22905 if (regno < 8)
22906 return LO_REGS;
22907 return HI_REGS;
22910 if (TARGET_THUMB2 && regno < 8)
22911 return LO_REGS;
22913 if ( regno <= LAST_ARM_REGNUM
22914 || regno == FRAME_POINTER_REGNUM
22915 || regno == ARG_POINTER_REGNUM)
22916 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22918 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22919 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22921 if (IS_VFP_REGNUM (regno))
22923 if (regno <= D7_VFP_REGNUM)
22924 return VFP_D0_D7_REGS;
22925 else if (regno <= LAST_LO_VFP_REGNUM)
22926 return VFP_LO_REGS;
22927 else
22928 return VFP_HI_REGS;
22931 if (IS_IWMMXT_REGNUM (regno))
22932 return IWMMXT_REGS;
22934 if (IS_IWMMXT_GR_REGNUM (regno))
22935 return IWMMXT_GR_REGS;
22937 return NO_REGS;
22940 /* Handle a special case when computing the offset
22941 of an argument from the frame pointer. */
22943 arm_debugger_arg_offset (int value, rtx addr)
22945 rtx insn;
22947 /* We are only interested if dbxout_parms() failed to compute the offset. */
22948 if (value != 0)
22949 return 0;
22951 /* We can only cope with the case where the address is held in a register. */
22952 if (!REG_P (addr))
22953 return 0;
22955 /* If we are using the frame pointer to point at the argument, then
22956 an offset of 0 is correct. */
22957 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22958 return 0;
22960 /* If we are using the stack pointer to point at the
22961 argument, then an offset of 0 is correct. */
22962 /* ??? Check this is consistent with thumb2 frame layout. */
22963 if ((TARGET_THUMB || !frame_pointer_needed)
22964 && REGNO (addr) == SP_REGNUM)
22965 return 0;
22967 /* Oh dear. The argument is pointed to by a register rather
22968 than being held in a register, or being stored at a known
22969 offset from the frame pointer. Since GDB only understands
22970 those two kinds of argument we must translate the address
22971 held in the register into an offset from the frame pointer.
22972 We do this by searching through the insns for the function
22973 looking to see where this register gets its value. If the
22974 register is initialized from the frame pointer plus an offset
22975 then we are in luck and we can continue, otherwise we give up.
22977 This code is exercised by producing debugging information
22978 for a function with arguments like this:
22980 double func (double a, double b, int c, double d) {return d;}
22982 Without this code the stab for parameter 'd' will be set to
22983 an offset of 0 from the frame pointer, rather than 8. */
22985 /* The if() statement says:
22987 If the insn is a normal instruction
22988 and if the insn is setting the value in a register
22989 and if the register being set is the register holding the address of the argument
22990 and if the address is computing by an addition
22991 that involves adding to a register
22992 which is the frame pointer
22993 a constant integer
22995 then... */
22997 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22999 if ( NONJUMP_INSN_P (insn)
23000 && GET_CODE (PATTERN (insn)) == SET
23001 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23002 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23003 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23004 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23005 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23008 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23010 break;
23014 if (value == 0)
23016 debug_rtx (addr);
23017 warning (0, "unable to compute real location of stacked parameter");
23018 value = 8; /* XXX magic hack */
23021 return value;
23024 typedef enum {
23025 T_V8QI,
23026 T_V4HI,
23027 T_V4HF,
23028 T_V2SI,
23029 T_V2SF,
23030 T_DI,
23031 T_V16QI,
23032 T_V8HI,
23033 T_V4SI,
23034 T_V4SF,
23035 T_V2DI,
23036 T_TI,
23037 T_EI,
23038 T_OI,
23039 T_MAX /* Size of enum. Keep last. */
23040 } neon_builtin_type_mode;
23042 #define TYPE_MODE_BIT(X) (1 << (X))
23044 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23045 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23046 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23047 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23048 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23049 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23051 #define v8qi_UP T_V8QI
23052 #define v4hi_UP T_V4HI
23053 #define v4hf_UP T_V4HF
23054 #define v2si_UP T_V2SI
23055 #define v2sf_UP T_V2SF
23056 #define di_UP T_DI
23057 #define v16qi_UP T_V16QI
23058 #define v8hi_UP T_V8HI
23059 #define v4si_UP T_V4SI
23060 #define v4sf_UP T_V4SF
23061 #define v2di_UP T_V2DI
23062 #define ti_UP T_TI
23063 #define ei_UP T_EI
23064 #define oi_UP T_OI
23066 #define UP(X) X##_UP
23068 typedef enum {
23069 NEON_BINOP,
23070 NEON_TERNOP,
23071 NEON_UNOP,
23072 NEON_GETLANE,
23073 NEON_SETLANE,
23074 NEON_CREATE,
23075 NEON_RINT,
23076 NEON_DUP,
23077 NEON_DUPLANE,
23078 NEON_COMBINE,
23079 NEON_SPLIT,
23080 NEON_LANEMUL,
23081 NEON_LANEMULL,
23082 NEON_LANEMULH,
23083 NEON_LANEMAC,
23084 NEON_SCALARMUL,
23085 NEON_SCALARMULL,
23086 NEON_SCALARMULH,
23087 NEON_SCALARMAC,
23088 NEON_CONVERT,
23089 NEON_FLOAT_WIDEN,
23090 NEON_FLOAT_NARROW,
23091 NEON_FIXCONV,
23092 NEON_SELECT,
23093 NEON_RESULTPAIR,
23094 NEON_REINTERP,
23095 NEON_VTBL,
23096 NEON_VTBX,
23097 NEON_LOAD1,
23098 NEON_LOAD1LANE,
23099 NEON_STORE1,
23100 NEON_STORE1LANE,
23101 NEON_LOADSTRUCT,
23102 NEON_LOADSTRUCTLANE,
23103 NEON_STORESTRUCT,
23104 NEON_STORESTRUCTLANE,
23105 NEON_LOGICBINOP,
23106 NEON_SHIFTINSERT,
23107 NEON_SHIFTIMM,
23108 NEON_SHIFTACC
23109 } neon_itype;
23111 typedef struct {
23112 const char *name;
23113 const neon_itype itype;
23114 const neon_builtin_type_mode mode;
23115 const enum insn_code code;
23116 unsigned int fcode;
23117 } neon_builtin_datum;
23119 #define CF(N,X) CODE_FOR_neon_##N##X
23121 #define VAR1(T, N, A) \
23122 {#N, NEON_##T, UP (A), CF (N, A), 0}
23123 #define VAR2(T, N, A, B) \
23124 VAR1 (T, N, A), \
23125 {#N, NEON_##T, UP (B), CF (N, B), 0}
23126 #define VAR3(T, N, A, B, C) \
23127 VAR2 (T, N, A, B), \
23128 {#N, NEON_##T, UP (C), CF (N, C), 0}
23129 #define VAR4(T, N, A, B, C, D) \
23130 VAR3 (T, N, A, B, C), \
23131 {#N, NEON_##T, UP (D), CF (N, D), 0}
23132 #define VAR5(T, N, A, B, C, D, E) \
23133 VAR4 (T, N, A, B, C, D), \
23134 {#N, NEON_##T, UP (E), CF (N, E), 0}
23135 #define VAR6(T, N, A, B, C, D, E, F) \
23136 VAR5 (T, N, A, B, C, D, E), \
23137 {#N, NEON_##T, UP (F), CF (N, F), 0}
23138 #define VAR7(T, N, A, B, C, D, E, F, G) \
23139 VAR6 (T, N, A, B, C, D, E, F), \
23140 {#N, NEON_##T, UP (G), CF (N, G), 0}
23141 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23142 VAR7 (T, N, A, B, C, D, E, F, G), \
23143 {#N, NEON_##T, UP (H), CF (N, H), 0}
23144 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23145 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23146 {#N, NEON_##T, UP (I), CF (N, I), 0}
23147 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23148 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23149 {#N, NEON_##T, UP (J), CF (N, J), 0}
23151 /* The NEON builtin data can be found in arm_neon_builtins.def.
23152 The mode entries in the following table correspond to the "key" type of the
23153 instruction variant, i.e. equivalent to that which would be specified after
23154 the assembler mnemonic, which usually refers to the last vector operand.
23155 (Signed/unsigned/polynomial types are not differentiated between though, and
23156 are all mapped onto the same mode for a given element size.) The modes
23157 listed per instruction should be the same as those defined for that
23158 instruction's pattern in neon.md. */
23160 static neon_builtin_datum neon_builtin_data[] =
23162 #include "arm_neon_builtins.def"
23165 #undef CF
23166 #undef VAR1
23167 #undef VAR2
23168 #undef VAR3
23169 #undef VAR4
23170 #undef VAR5
23171 #undef VAR6
23172 #undef VAR7
23173 #undef VAR8
23174 #undef VAR9
23175 #undef VAR10
23177 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23178 #define VAR1(T, N, A) \
23179 CF (N, A)
23180 #define VAR2(T, N, A, B) \
23181 VAR1 (T, N, A), \
23182 CF (N, B)
23183 #define VAR3(T, N, A, B, C) \
23184 VAR2 (T, N, A, B), \
23185 CF (N, C)
23186 #define VAR4(T, N, A, B, C, D) \
23187 VAR3 (T, N, A, B, C), \
23188 CF (N, D)
23189 #define VAR5(T, N, A, B, C, D, E) \
23190 VAR4 (T, N, A, B, C, D), \
23191 CF (N, E)
23192 #define VAR6(T, N, A, B, C, D, E, F) \
23193 VAR5 (T, N, A, B, C, D, E), \
23194 CF (N, F)
23195 #define VAR7(T, N, A, B, C, D, E, F, G) \
23196 VAR6 (T, N, A, B, C, D, E, F), \
23197 CF (N, G)
23198 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23199 VAR7 (T, N, A, B, C, D, E, F, G), \
23200 CF (N, H)
23201 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23202 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23203 CF (N, I)
23204 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23205 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23206 CF (N, J)
23207 enum arm_builtins
23209 ARM_BUILTIN_GETWCGR0,
23210 ARM_BUILTIN_GETWCGR1,
23211 ARM_BUILTIN_GETWCGR2,
23212 ARM_BUILTIN_GETWCGR3,
23214 ARM_BUILTIN_SETWCGR0,
23215 ARM_BUILTIN_SETWCGR1,
23216 ARM_BUILTIN_SETWCGR2,
23217 ARM_BUILTIN_SETWCGR3,
23219 ARM_BUILTIN_WZERO,
23221 ARM_BUILTIN_WAVG2BR,
23222 ARM_BUILTIN_WAVG2HR,
23223 ARM_BUILTIN_WAVG2B,
23224 ARM_BUILTIN_WAVG2H,
23226 ARM_BUILTIN_WACCB,
23227 ARM_BUILTIN_WACCH,
23228 ARM_BUILTIN_WACCW,
23230 ARM_BUILTIN_WMACS,
23231 ARM_BUILTIN_WMACSZ,
23232 ARM_BUILTIN_WMACU,
23233 ARM_BUILTIN_WMACUZ,
23235 ARM_BUILTIN_WSADB,
23236 ARM_BUILTIN_WSADBZ,
23237 ARM_BUILTIN_WSADH,
23238 ARM_BUILTIN_WSADHZ,
23240 ARM_BUILTIN_WALIGNI,
23241 ARM_BUILTIN_WALIGNR0,
23242 ARM_BUILTIN_WALIGNR1,
23243 ARM_BUILTIN_WALIGNR2,
23244 ARM_BUILTIN_WALIGNR3,
23246 ARM_BUILTIN_TMIA,
23247 ARM_BUILTIN_TMIAPH,
23248 ARM_BUILTIN_TMIABB,
23249 ARM_BUILTIN_TMIABT,
23250 ARM_BUILTIN_TMIATB,
23251 ARM_BUILTIN_TMIATT,
23253 ARM_BUILTIN_TMOVMSKB,
23254 ARM_BUILTIN_TMOVMSKH,
23255 ARM_BUILTIN_TMOVMSKW,
23257 ARM_BUILTIN_TBCSTB,
23258 ARM_BUILTIN_TBCSTH,
23259 ARM_BUILTIN_TBCSTW,
23261 ARM_BUILTIN_WMADDS,
23262 ARM_BUILTIN_WMADDU,
23264 ARM_BUILTIN_WPACKHSS,
23265 ARM_BUILTIN_WPACKWSS,
23266 ARM_BUILTIN_WPACKDSS,
23267 ARM_BUILTIN_WPACKHUS,
23268 ARM_BUILTIN_WPACKWUS,
23269 ARM_BUILTIN_WPACKDUS,
23271 ARM_BUILTIN_WADDB,
23272 ARM_BUILTIN_WADDH,
23273 ARM_BUILTIN_WADDW,
23274 ARM_BUILTIN_WADDSSB,
23275 ARM_BUILTIN_WADDSSH,
23276 ARM_BUILTIN_WADDSSW,
23277 ARM_BUILTIN_WADDUSB,
23278 ARM_BUILTIN_WADDUSH,
23279 ARM_BUILTIN_WADDUSW,
23280 ARM_BUILTIN_WSUBB,
23281 ARM_BUILTIN_WSUBH,
23282 ARM_BUILTIN_WSUBW,
23283 ARM_BUILTIN_WSUBSSB,
23284 ARM_BUILTIN_WSUBSSH,
23285 ARM_BUILTIN_WSUBSSW,
23286 ARM_BUILTIN_WSUBUSB,
23287 ARM_BUILTIN_WSUBUSH,
23288 ARM_BUILTIN_WSUBUSW,
23290 ARM_BUILTIN_WAND,
23291 ARM_BUILTIN_WANDN,
23292 ARM_BUILTIN_WOR,
23293 ARM_BUILTIN_WXOR,
23295 ARM_BUILTIN_WCMPEQB,
23296 ARM_BUILTIN_WCMPEQH,
23297 ARM_BUILTIN_WCMPEQW,
23298 ARM_BUILTIN_WCMPGTUB,
23299 ARM_BUILTIN_WCMPGTUH,
23300 ARM_BUILTIN_WCMPGTUW,
23301 ARM_BUILTIN_WCMPGTSB,
23302 ARM_BUILTIN_WCMPGTSH,
23303 ARM_BUILTIN_WCMPGTSW,
23305 ARM_BUILTIN_TEXTRMSB,
23306 ARM_BUILTIN_TEXTRMSH,
23307 ARM_BUILTIN_TEXTRMSW,
23308 ARM_BUILTIN_TEXTRMUB,
23309 ARM_BUILTIN_TEXTRMUH,
23310 ARM_BUILTIN_TEXTRMUW,
23311 ARM_BUILTIN_TINSRB,
23312 ARM_BUILTIN_TINSRH,
23313 ARM_BUILTIN_TINSRW,
23315 ARM_BUILTIN_WMAXSW,
23316 ARM_BUILTIN_WMAXSH,
23317 ARM_BUILTIN_WMAXSB,
23318 ARM_BUILTIN_WMAXUW,
23319 ARM_BUILTIN_WMAXUH,
23320 ARM_BUILTIN_WMAXUB,
23321 ARM_BUILTIN_WMINSW,
23322 ARM_BUILTIN_WMINSH,
23323 ARM_BUILTIN_WMINSB,
23324 ARM_BUILTIN_WMINUW,
23325 ARM_BUILTIN_WMINUH,
23326 ARM_BUILTIN_WMINUB,
23328 ARM_BUILTIN_WMULUM,
23329 ARM_BUILTIN_WMULSM,
23330 ARM_BUILTIN_WMULUL,
23332 ARM_BUILTIN_PSADBH,
23333 ARM_BUILTIN_WSHUFH,
23335 ARM_BUILTIN_WSLLH,
23336 ARM_BUILTIN_WSLLW,
23337 ARM_BUILTIN_WSLLD,
23338 ARM_BUILTIN_WSRAH,
23339 ARM_BUILTIN_WSRAW,
23340 ARM_BUILTIN_WSRAD,
23341 ARM_BUILTIN_WSRLH,
23342 ARM_BUILTIN_WSRLW,
23343 ARM_BUILTIN_WSRLD,
23344 ARM_BUILTIN_WRORH,
23345 ARM_BUILTIN_WRORW,
23346 ARM_BUILTIN_WRORD,
23347 ARM_BUILTIN_WSLLHI,
23348 ARM_BUILTIN_WSLLWI,
23349 ARM_BUILTIN_WSLLDI,
23350 ARM_BUILTIN_WSRAHI,
23351 ARM_BUILTIN_WSRAWI,
23352 ARM_BUILTIN_WSRADI,
23353 ARM_BUILTIN_WSRLHI,
23354 ARM_BUILTIN_WSRLWI,
23355 ARM_BUILTIN_WSRLDI,
23356 ARM_BUILTIN_WRORHI,
23357 ARM_BUILTIN_WRORWI,
23358 ARM_BUILTIN_WRORDI,
23360 ARM_BUILTIN_WUNPCKIHB,
23361 ARM_BUILTIN_WUNPCKIHH,
23362 ARM_BUILTIN_WUNPCKIHW,
23363 ARM_BUILTIN_WUNPCKILB,
23364 ARM_BUILTIN_WUNPCKILH,
23365 ARM_BUILTIN_WUNPCKILW,
23367 ARM_BUILTIN_WUNPCKEHSB,
23368 ARM_BUILTIN_WUNPCKEHSH,
23369 ARM_BUILTIN_WUNPCKEHSW,
23370 ARM_BUILTIN_WUNPCKEHUB,
23371 ARM_BUILTIN_WUNPCKEHUH,
23372 ARM_BUILTIN_WUNPCKEHUW,
23373 ARM_BUILTIN_WUNPCKELSB,
23374 ARM_BUILTIN_WUNPCKELSH,
23375 ARM_BUILTIN_WUNPCKELSW,
23376 ARM_BUILTIN_WUNPCKELUB,
23377 ARM_BUILTIN_WUNPCKELUH,
23378 ARM_BUILTIN_WUNPCKELUW,
23380 ARM_BUILTIN_WABSB,
23381 ARM_BUILTIN_WABSH,
23382 ARM_BUILTIN_WABSW,
23384 ARM_BUILTIN_WADDSUBHX,
23385 ARM_BUILTIN_WSUBADDHX,
23387 ARM_BUILTIN_WABSDIFFB,
23388 ARM_BUILTIN_WABSDIFFH,
23389 ARM_BUILTIN_WABSDIFFW,
23391 ARM_BUILTIN_WADDCH,
23392 ARM_BUILTIN_WADDCW,
23394 ARM_BUILTIN_WAVG4,
23395 ARM_BUILTIN_WAVG4R,
23397 ARM_BUILTIN_WMADDSX,
23398 ARM_BUILTIN_WMADDUX,
23400 ARM_BUILTIN_WMADDSN,
23401 ARM_BUILTIN_WMADDUN,
23403 ARM_BUILTIN_WMULWSM,
23404 ARM_BUILTIN_WMULWUM,
23406 ARM_BUILTIN_WMULWSMR,
23407 ARM_BUILTIN_WMULWUMR,
23409 ARM_BUILTIN_WMULWL,
23411 ARM_BUILTIN_WMULSMR,
23412 ARM_BUILTIN_WMULUMR,
23414 ARM_BUILTIN_WQMULM,
23415 ARM_BUILTIN_WQMULMR,
23417 ARM_BUILTIN_WQMULWM,
23418 ARM_BUILTIN_WQMULWMR,
23420 ARM_BUILTIN_WADDBHUSM,
23421 ARM_BUILTIN_WADDBHUSL,
23423 ARM_BUILTIN_WQMIABB,
23424 ARM_BUILTIN_WQMIABT,
23425 ARM_BUILTIN_WQMIATB,
23426 ARM_BUILTIN_WQMIATT,
23428 ARM_BUILTIN_WQMIABBN,
23429 ARM_BUILTIN_WQMIABTN,
23430 ARM_BUILTIN_WQMIATBN,
23431 ARM_BUILTIN_WQMIATTN,
23433 ARM_BUILTIN_WMIABB,
23434 ARM_BUILTIN_WMIABT,
23435 ARM_BUILTIN_WMIATB,
23436 ARM_BUILTIN_WMIATT,
23438 ARM_BUILTIN_WMIABBN,
23439 ARM_BUILTIN_WMIABTN,
23440 ARM_BUILTIN_WMIATBN,
23441 ARM_BUILTIN_WMIATTN,
23443 ARM_BUILTIN_WMIAWBB,
23444 ARM_BUILTIN_WMIAWBT,
23445 ARM_BUILTIN_WMIAWTB,
23446 ARM_BUILTIN_WMIAWTT,
23448 ARM_BUILTIN_WMIAWBBN,
23449 ARM_BUILTIN_WMIAWBTN,
23450 ARM_BUILTIN_WMIAWTBN,
23451 ARM_BUILTIN_WMIAWTTN,
23453 ARM_BUILTIN_WMERGE,
23455 ARM_BUILTIN_CRC32B,
23456 ARM_BUILTIN_CRC32H,
23457 ARM_BUILTIN_CRC32W,
23458 ARM_BUILTIN_CRC32CB,
23459 ARM_BUILTIN_CRC32CH,
23460 ARM_BUILTIN_CRC32CW,
23462 #undef CRYPTO1
23463 #undef CRYPTO2
23464 #undef CRYPTO3
23466 #define CRYPTO1(L, U, M1, M2) \
23467 ARM_BUILTIN_CRYPTO_##U,
23468 #define CRYPTO2(L, U, M1, M2, M3) \
23469 ARM_BUILTIN_CRYPTO_##U,
23470 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23471 ARM_BUILTIN_CRYPTO_##U,
23473 #include "crypto.def"
23475 #undef CRYPTO1
23476 #undef CRYPTO2
23477 #undef CRYPTO3
23479 #include "arm_neon_builtins.def"
23481 ,ARM_BUILTIN_MAX
23484 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23486 #undef CF
23487 #undef VAR1
23488 #undef VAR2
23489 #undef VAR3
23490 #undef VAR4
23491 #undef VAR5
23492 #undef VAR6
23493 #undef VAR7
23494 #undef VAR8
23495 #undef VAR9
23496 #undef VAR10
23498 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23500 #define NUM_DREG_TYPES 5
23501 #define NUM_QREG_TYPES 6
23503 static void
23504 arm_init_neon_builtins (void)
23506 unsigned int i, fcode;
23507 tree decl;
23509 tree neon_intQI_type_node;
23510 tree neon_intHI_type_node;
23511 tree neon_floatHF_type_node;
23512 tree neon_polyQI_type_node;
23513 tree neon_polyHI_type_node;
23514 tree neon_intSI_type_node;
23515 tree neon_intDI_type_node;
23516 tree neon_intUTI_type_node;
23517 tree neon_float_type_node;
23519 tree intQI_pointer_node;
23520 tree intHI_pointer_node;
23521 tree intSI_pointer_node;
23522 tree intDI_pointer_node;
23523 tree float_pointer_node;
23525 tree const_intQI_node;
23526 tree const_intHI_node;
23527 tree const_intSI_node;
23528 tree const_intDI_node;
23529 tree const_float_node;
23531 tree const_intQI_pointer_node;
23532 tree const_intHI_pointer_node;
23533 tree const_intSI_pointer_node;
23534 tree const_intDI_pointer_node;
23535 tree const_float_pointer_node;
23537 tree V8QI_type_node;
23538 tree V4HI_type_node;
23539 tree V4HF_type_node;
23540 tree V2SI_type_node;
23541 tree V2SF_type_node;
23542 tree V16QI_type_node;
23543 tree V8HI_type_node;
23544 tree V4SI_type_node;
23545 tree V4SF_type_node;
23546 tree V2DI_type_node;
23548 tree intUQI_type_node;
23549 tree intUHI_type_node;
23550 tree intUSI_type_node;
23551 tree intUDI_type_node;
23553 tree intEI_type_node;
23554 tree intOI_type_node;
23555 tree intCI_type_node;
23556 tree intXI_type_node;
23558 tree V8QI_pointer_node;
23559 tree V4HI_pointer_node;
23560 tree V2SI_pointer_node;
23561 tree V2SF_pointer_node;
23562 tree V16QI_pointer_node;
23563 tree V8HI_pointer_node;
23564 tree V4SI_pointer_node;
23565 tree V4SF_pointer_node;
23566 tree V2DI_pointer_node;
23568 tree void_ftype_pv8qi_v8qi_v8qi;
23569 tree void_ftype_pv4hi_v4hi_v4hi;
23570 tree void_ftype_pv2si_v2si_v2si;
23571 tree void_ftype_pv2sf_v2sf_v2sf;
23572 tree void_ftype_pdi_di_di;
23573 tree void_ftype_pv16qi_v16qi_v16qi;
23574 tree void_ftype_pv8hi_v8hi_v8hi;
23575 tree void_ftype_pv4si_v4si_v4si;
23576 tree void_ftype_pv4sf_v4sf_v4sf;
23577 tree void_ftype_pv2di_v2di_v2di;
23579 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23580 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23581 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23583 /* Create distinguished type nodes for NEON vector element types,
23584 and pointers to values of such types, so we can detect them later. */
23585 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23586 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23587 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23588 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23589 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23590 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23591 neon_float_type_node = make_node (REAL_TYPE);
23592 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23593 layout_type (neon_float_type_node);
23594 neon_floatHF_type_node = make_node (REAL_TYPE);
23595 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23596 layout_type (neon_floatHF_type_node);
23598 /* Define typedefs which exactly correspond to the modes we are basing vector
23599 types on. If you change these names you'll need to change
23600 the table used by arm_mangle_type too. */
23601 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23602 "__builtin_neon_qi");
23603 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23604 "__builtin_neon_hi");
23605 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23606 "__builtin_neon_hf");
23607 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23608 "__builtin_neon_si");
23609 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23610 "__builtin_neon_sf");
23611 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23612 "__builtin_neon_di");
23613 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23614 "__builtin_neon_poly8");
23615 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23616 "__builtin_neon_poly16");
23618 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23619 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23620 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23621 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23622 float_pointer_node = build_pointer_type (neon_float_type_node);
23624 /* Next create constant-qualified versions of the above types. */
23625 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23626 TYPE_QUAL_CONST);
23627 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23628 TYPE_QUAL_CONST);
23629 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23630 TYPE_QUAL_CONST);
23631 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23632 TYPE_QUAL_CONST);
23633 const_float_node = build_qualified_type (neon_float_type_node,
23634 TYPE_QUAL_CONST);
23636 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23637 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23638 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23639 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23640 const_float_pointer_node = build_pointer_type (const_float_node);
23642 /* Now create vector types based on our NEON element types. */
23643 /* 64-bit vectors. */
23644 V8QI_type_node =
23645 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23646 V4HI_type_node =
23647 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23648 V4HF_type_node =
23649 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23650 V2SI_type_node =
23651 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23652 V2SF_type_node =
23653 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23654 /* 128-bit vectors. */
23655 V16QI_type_node =
23656 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23657 V8HI_type_node =
23658 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23659 V4SI_type_node =
23660 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23661 V4SF_type_node =
23662 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23663 V2DI_type_node =
23664 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23666 /* Unsigned integer types for various mode sizes. */
23667 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23668 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23669 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23670 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23671 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23674 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23675 "__builtin_neon_uqi");
23676 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23677 "__builtin_neon_uhi");
23678 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23679 "__builtin_neon_usi");
23680 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23681 "__builtin_neon_udi");
23682 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23683 "__builtin_neon_poly64");
23684 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23685 "__builtin_neon_poly128");
23687 /* Opaque integer types for structures of vectors. */
23688 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23689 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23690 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23691 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23693 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23694 "__builtin_neon_ti");
23695 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23696 "__builtin_neon_ei");
23697 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23698 "__builtin_neon_oi");
23699 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23700 "__builtin_neon_ci");
23701 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23702 "__builtin_neon_xi");
23704 /* Pointers to vector types. */
23705 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23706 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23707 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23708 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23709 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23710 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23711 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23712 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23713 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23715 /* Operations which return results as pairs. */
23716 void_ftype_pv8qi_v8qi_v8qi =
23717 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23718 V8QI_type_node, NULL);
23719 void_ftype_pv4hi_v4hi_v4hi =
23720 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23721 V4HI_type_node, NULL);
23722 void_ftype_pv2si_v2si_v2si =
23723 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23724 V2SI_type_node, NULL);
23725 void_ftype_pv2sf_v2sf_v2sf =
23726 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23727 V2SF_type_node, NULL);
23728 void_ftype_pdi_di_di =
23729 build_function_type_list (void_type_node, intDI_pointer_node,
23730 neon_intDI_type_node, neon_intDI_type_node, NULL);
23731 void_ftype_pv16qi_v16qi_v16qi =
23732 build_function_type_list (void_type_node, V16QI_pointer_node,
23733 V16QI_type_node, V16QI_type_node, NULL);
23734 void_ftype_pv8hi_v8hi_v8hi =
23735 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23736 V8HI_type_node, NULL);
23737 void_ftype_pv4si_v4si_v4si =
23738 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23739 V4SI_type_node, NULL);
23740 void_ftype_pv4sf_v4sf_v4sf =
23741 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23742 V4SF_type_node, NULL);
23743 void_ftype_pv2di_v2di_v2di =
23744 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23745 V2DI_type_node, NULL);
23747 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23749 tree V4USI_type_node =
23750 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23752 tree V16UQI_type_node =
23753 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23755 tree v16uqi_ftype_v16uqi
23756 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23758 tree v16uqi_ftype_v16uqi_v16uqi
23759 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23760 V16UQI_type_node, NULL_TREE);
23762 tree v4usi_ftype_v4usi
23763 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23765 tree v4usi_ftype_v4usi_v4usi
23766 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23767 V4USI_type_node, NULL_TREE);
23769 tree v4usi_ftype_v4usi_v4usi_v4usi
23770 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23771 V4USI_type_node, V4USI_type_node, NULL_TREE);
23773 tree uti_ftype_udi_udi
23774 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23775 intUDI_type_node, NULL_TREE);
23777 #undef CRYPTO1
23778 #undef CRYPTO2
23779 #undef CRYPTO3
23780 #undef C
23781 #undef N
23782 #undef CF
23783 #undef FT1
23784 #undef FT2
23785 #undef FT3
23787 #define C(U) \
23788 ARM_BUILTIN_CRYPTO_##U
23789 #define N(L) \
23790 "__builtin_arm_crypto_"#L
23791 #define FT1(R, A) \
23792 R##_ftype_##A
23793 #define FT2(R, A1, A2) \
23794 R##_ftype_##A1##_##A2
23795 #define FT3(R, A1, A2, A3) \
23796 R##_ftype_##A1##_##A2##_##A3
23797 #define CRYPTO1(L, U, R, A) \
23798 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23799 C (U), BUILT_IN_MD, \
23800 NULL, NULL_TREE);
23801 #define CRYPTO2(L, U, R, A1, A2) \
23802 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23803 C (U), BUILT_IN_MD, \
23804 NULL, NULL_TREE);
23806 #define CRYPTO3(L, U, R, A1, A2, A3) \
23807 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23808 C (U), BUILT_IN_MD, \
23809 NULL, NULL_TREE);
23810 #include "crypto.def"
23812 #undef CRYPTO1
23813 #undef CRYPTO2
23814 #undef CRYPTO3
23815 #undef C
23816 #undef N
23817 #undef FT1
23818 #undef FT2
23819 #undef FT3
23821 dreg_types[0] = V8QI_type_node;
23822 dreg_types[1] = V4HI_type_node;
23823 dreg_types[2] = V2SI_type_node;
23824 dreg_types[3] = V2SF_type_node;
23825 dreg_types[4] = neon_intDI_type_node;
23827 qreg_types[0] = V16QI_type_node;
23828 qreg_types[1] = V8HI_type_node;
23829 qreg_types[2] = V4SI_type_node;
23830 qreg_types[3] = V4SF_type_node;
23831 qreg_types[4] = V2DI_type_node;
23832 qreg_types[5] = neon_intUTI_type_node;
23834 for (i = 0; i < NUM_QREG_TYPES; i++)
23836 int j;
23837 for (j = 0; j < NUM_QREG_TYPES; j++)
23839 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23840 reinterp_ftype_dreg[i][j]
23841 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23843 reinterp_ftype_qreg[i][j]
23844 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23848 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23849 i < ARRAY_SIZE (neon_builtin_data);
23850 i++, fcode++)
23852 neon_builtin_datum *d = &neon_builtin_data[i];
23854 const char* const modenames[] = {
23855 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23856 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23857 "ti", "ei", "oi"
23859 char namebuf[60];
23860 tree ftype = NULL;
23861 int is_load = 0, is_store = 0;
23863 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23865 d->fcode = fcode;
23867 switch (d->itype)
23869 case NEON_LOAD1:
23870 case NEON_LOAD1LANE:
23871 case NEON_LOADSTRUCT:
23872 case NEON_LOADSTRUCTLANE:
23873 is_load = 1;
23874 /* Fall through. */
23875 case NEON_STORE1:
23876 case NEON_STORE1LANE:
23877 case NEON_STORESTRUCT:
23878 case NEON_STORESTRUCTLANE:
23879 if (!is_load)
23880 is_store = 1;
23881 /* Fall through. */
23882 case NEON_UNOP:
23883 case NEON_RINT:
23884 case NEON_BINOP:
23885 case NEON_LOGICBINOP:
23886 case NEON_SHIFTINSERT:
23887 case NEON_TERNOP:
23888 case NEON_GETLANE:
23889 case NEON_SETLANE:
23890 case NEON_CREATE:
23891 case NEON_DUP:
23892 case NEON_DUPLANE:
23893 case NEON_SHIFTIMM:
23894 case NEON_SHIFTACC:
23895 case NEON_COMBINE:
23896 case NEON_SPLIT:
23897 case NEON_CONVERT:
23898 case NEON_FIXCONV:
23899 case NEON_LANEMUL:
23900 case NEON_LANEMULL:
23901 case NEON_LANEMULH:
23902 case NEON_LANEMAC:
23903 case NEON_SCALARMUL:
23904 case NEON_SCALARMULL:
23905 case NEON_SCALARMULH:
23906 case NEON_SCALARMAC:
23907 case NEON_SELECT:
23908 case NEON_VTBL:
23909 case NEON_VTBX:
23911 int k;
23912 tree return_type = void_type_node, args = void_list_node;
23914 /* Build a function type directly from the insn_data for
23915 this builtin. The build_function_type() function takes
23916 care of removing duplicates for us. */
23917 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23919 tree eltype;
23921 if (is_load && k == 1)
23923 /* Neon load patterns always have the memory
23924 operand in the operand 1 position. */
23925 gcc_assert (insn_data[d->code].operand[k].predicate
23926 == neon_struct_operand);
23928 switch (d->mode)
23930 case T_V8QI:
23931 case T_V16QI:
23932 eltype = const_intQI_pointer_node;
23933 break;
23935 case T_V4HI:
23936 case T_V8HI:
23937 eltype = const_intHI_pointer_node;
23938 break;
23940 case T_V2SI:
23941 case T_V4SI:
23942 eltype = const_intSI_pointer_node;
23943 break;
23945 case T_V2SF:
23946 case T_V4SF:
23947 eltype = const_float_pointer_node;
23948 break;
23950 case T_DI:
23951 case T_V2DI:
23952 eltype = const_intDI_pointer_node;
23953 break;
23955 default: gcc_unreachable ();
23958 else if (is_store && k == 0)
23960 /* Similarly, Neon store patterns use operand 0 as
23961 the memory location to store to. */
23962 gcc_assert (insn_data[d->code].operand[k].predicate
23963 == neon_struct_operand);
23965 switch (d->mode)
23967 case T_V8QI:
23968 case T_V16QI:
23969 eltype = intQI_pointer_node;
23970 break;
23972 case T_V4HI:
23973 case T_V8HI:
23974 eltype = intHI_pointer_node;
23975 break;
23977 case T_V2SI:
23978 case T_V4SI:
23979 eltype = intSI_pointer_node;
23980 break;
23982 case T_V2SF:
23983 case T_V4SF:
23984 eltype = float_pointer_node;
23985 break;
23987 case T_DI:
23988 case T_V2DI:
23989 eltype = intDI_pointer_node;
23990 break;
23992 default: gcc_unreachable ();
23995 else
23997 switch (insn_data[d->code].operand[k].mode)
23999 case VOIDmode: eltype = void_type_node; break;
24000 /* Scalars. */
24001 case QImode: eltype = neon_intQI_type_node; break;
24002 case HImode: eltype = neon_intHI_type_node; break;
24003 case SImode: eltype = neon_intSI_type_node; break;
24004 case SFmode: eltype = neon_float_type_node; break;
24005 case DImode: eltype = neon_intDI_type_node; break;
24006 case TImode: eltype = intTI_type_node; break;
24007 case EImode: eltype = intEI_type_node; break;
24008 case OImode: eltype = intOI_type_node; break;
24009 case CImode: eltype = intCI_type_node; break;
24010 case XImode: eltype = intXI_type_node; break;
24011 /* 64-bit vectors. */
24012 case V8QImode: eltype = V8QI_type_node; break;
24013 case V4HImode: eltype = V4HI_type_node; break;
24014 case V2SImode: eltype = V2SI_type_node; break;
24015 case V2SFmode: eltype = V2SF_type_node; break;
24016 /* 128-bit vectors. */
24017 case V16QImode: eltype = V16QI_type_node; break;
24018 case V8HImode: eltype = V8HI_type_node; break;
24019 case V4SImode: eltype = V4SI_type_node; break;
24020 case V4SFmode: eltype = V4SF_type_node; break;
24021 case V2DImode: eltype = V2DI_type_node; break;
24022 default: gcc_unreachable ();
24026 if (k == 0 && !is_store)
24027 return_type = eltype;
24028 else
24029 args = tree_cons (NULL_TREE, eltype, args);
24032 ftype = build_function_type (return_type, args);
24034 break;
24036 case NEON_RESULTPAIR:
24038 switch (insn_data[d->code].operand[1].mode)
24040 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
24041 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
24042 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
24043 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
24044 case DImode: ftype = void_ftype_pdi_di_di; break;
24045 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
24046 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
24047 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
24048 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
24049 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
24050 default: gcc_unreachable ();
24053 break;
24055 case NEON_REINTERP:
24057 /* We iterate over NUM_DREG_TYPES doubleword types,
24058 then NUM_QREG_TYPES quadword types.
24059 V4HF is not a type used in reinterpret, so we translate
24060 d->mode to the correct index in reinterp_ftype_dreg. */
24061 bool qreg_p
24062 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24063 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24064 % NUM_QREG_TYPES;
24065 switch (insn_data[d->code].operand[0].mode)
24067 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24068 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24069 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24070 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24071 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24072 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24073 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24074 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24075 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24076 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24077 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24078 default: gcc_unreachable ();
24081 break;
24082 case NEON_FLOAT_WIDEN:
24084 tree eltype = NULL_TREE;
24085 tree return_type = NULL_TREE;
24087 switch (insn_data[d->code].operand[1].mode)
24089 case V4HFmode:
24090 eltype = V4HF_type_node;
24091 return_type = V4SF_type_node;
24092 break;
24093 default: gcc_unreachable ();
24095 ftype = build_function_type_list (return_type, eltype, NULL);
24096 break;
24098 case NEON_FLOAT_NARROW:
24100 tree eltype = NULL_TREE;
24101 tree return_type = NULL_TREE;
24103 switch (insn_data[d->code].operand[1].mode)
24105 case V4SFmode:
24106 eltype = V4SF_type_node;
24107 return_type = V4HF_type_node;
24108 break;
24109 default: gcc_unreachable ();
24111 ftype = build_function_type_list (return_type, eltype, NULL);
24112 break;
24114 default:
24115 gcc_unreachable ();
24118 gcc_assert (ftype != NULL);
24120 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24122 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24123 NULL_TREE);
24124 arm_builtin_decls[fcode] = decl;
24128 #undef NUM_DREG_TYPES
24129 #undef NUM_QREG_TYPES
24131 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24132 do \
24134 if ((MASK) & insn_flags) \
24136 tree bdecl; \
24137 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24138 BUILT_IN_MD, NULL, NULL_TREE); \
24139 arm_builtin_decls[CODE] = bdecl; \
24142 while (0)
24144 struct builtin_description
24146 const unsigned int mask;
24147 const enum insn_code icode;
24148 const char * const name;
24149 const enum arm_builtins code;
24150 const enum rtx_code comparison;
24151 const unsigned int flag;
24154 static const struct builtin_description bdesc_2arg[] =
24156 #define IWMMXT_BUILTIN(code, string, builtin) \
24157 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24158 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24160 #define IWMMXT2_BUILTIN(code, string, builtin) \
24161 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24162 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24164 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24165 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24166 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24167 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24168 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24169 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24170 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24171 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24172 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24173 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24174 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24175 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24176 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24177 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24178 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24179 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24180 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24181 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24182 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24183 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24184 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24185 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24186 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24187 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24188 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24189 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24190 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24191 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24192 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24193 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24194 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24195 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24196 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24197 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24198 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24199 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24200 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24201 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24202 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24203 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24204 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24205 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24206 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24207 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24208 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24209 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24210 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24211 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24212 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24213 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24214 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24215 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24216 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24217 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24218 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24219 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24220 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24221 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24222 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24223 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24224 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24225 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24226 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24227 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24228 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24229 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24230 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24231 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24232 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24233 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24234 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24235 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24236 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24237 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24238 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24239 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24240 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24241 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24243 #define IWMMXT_BUILTIN2(code, builtin) \
24244 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24246 #define IWMMXT2_BUILTIN2(code, builtin) \
24247 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24249 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24250 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24251 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24252 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24253 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24254 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24255 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24256 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24257 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24258 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24260 #define CRC32_BUILTIN(L, U) \
24261 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24262 UNKNOWN, 0},
24263 CRC32_BUILTIN (crc32b, CRC32B)
24264 CRC32_BUILTIN (crc32h, CRC32H)
24265 CRC32_BUILTIN (crc32w, CRC32W)
24266 CRC32_BUILTIN (crc32cb, CRC32CB)
24267 CRC32_BUILTIN (crc32ch, CRC32CH)
24268 CRC32_BUILTIN (crc32cw, CRC32CW)
24269 #undef CRC32_BUILTIN
24272 #define CRYPTO_BUILTIN(L, U) \
24273 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24274 UNKNOWN, 0},
24275 #undef CRYPTO1
24276 #undef CRYPTO2
24277 #undef CRYPTO3
24278 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24279 #define CRYPTO1(L, U, R, A)
24280 #define CRYPTO3(L, U, R, A1, A2, A3)
24281 #include "crypto.def"
24282 #undef CRYPTO1
24283 #undef CRYPTO2
24284 #undef CRYPTO3
24288 static const struct builtin_description bdesc_1arg[] =
24290 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24291 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24292 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24293 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24294 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24295 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24296 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24297 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24298 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24299 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24300 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24301 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24302 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24303 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24304 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24305 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24306 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24307 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24308 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24309 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24310 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24311 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24312 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24313 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24315 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24316 #define CRYPTO2(L, U, R, A1, A2)
24317 #define CRYPTO3(L, U, R, A1, A2, A3)
24318 #include "crypto.def"
24319 #undef CRYPTO1
24320 #undef CRYPTO2
24321 #undef CRYPTO3
24324 static const struct builtin_description bdesc_3arg[] =
24326 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24327 #define CRYPTO1(L, U, R, A)
24328 #define CRYPTO2(L, U, R, A1, A2)
24329 #include "crypto.def"
24330 #undef CRYPTO1
24331 #undef CRYPTO2
24332 #undef CRYPTO3
24334 #undef CRYPTO_BUILTIN
24336 /* Set up all the iWMMXt builtins. This is not called if
24337 TARGET_IWMMXT is zero. */
24339 static void
24340 arm_init_iwmmxt_builtins (void)
24342 const struct builtin_description * d;
24343 size_t i;
24345 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24346 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24347 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24349 tree v8qi_ftype_v8qi_v8qi_int
24350 = build_function_type_list (V8QI_type_node,
24351 V8QI_type_node, V8QI_type_node,
24352 integer_type_node, NULL_TREE);
24353 tree v4hi_ftype_v4hi_int
24354 = build_function_type_list (V4HI_type_node,
24355 V4HI_type_node, integer_type_node, NULL_TREE);
24356 tree v2si_ftype_v2si_int
24357 = build_function_type_list (V2SI_type_node,
24358 V2SI_type_node, integer_type_node, NULL_TREE);
24359 tree v2si_ftype_di_di
24360 = build_function_type_list (V2SI_type_node,
24361 long_long_integer_type_node,
24362 long_long_integer_type_node,
24363 NULL_TREE);
24364 tree di_ftype_di_int
24365 = build_function_type_list (long_long_integer_type_node,
24366 long_long_integer_type_node,
24367 integer_type_node, NULL_TREE);
24368 tree di_ftype_di_int_int
24369 = build_function_type_list (long_long_integer_type_node,
24370 long_long_integer_type_node,
24371 integer_type_node,
24372 integer_type_node, NULL_TREE);
24373 tree int_ftype_v8qi
24374 = build_function_type_list (integer_type_node,
24375 V8QI_type_node, NULL_TREE);
24376 tree int_ftype_v4hi
24377 = build_function_type_list (integer_type_node,
24378 V4HI_type_node, NULL_TREE);
24379 tree int_ftype_v2si
24380 = build_function_type_list (integer_type_node,
24381 V2SI_type_node, NULL_TREE);
24382 tree int_ftype_v8qi_int
24383 = build_function_type_list (integer_type_node,
24384 V8QI_type_node, integer_type_node, NULL_TREE);
24385 tree int_ftype_v4hi_int
24386 = build_function_type_list (integer_type_node,
24387 V4HI_type_node, integer_type_node, NULL_TREE);
24388 tree int_ftype_v2si_int
24389 = build_function_type_list (integer_type_node,
24390 V2SI_type_node, integer_type_node, NULL_TREE);
24391 tree v8qi_ftype_v8qi_int_int
24392 = build_function_type_list (V8QI_type_node,
24393 V8QI_type_node, integer_type_node,
24394 integer_type_node, NULL_TREE);
24395 tree v4hi_ftype_v4hi_int_int
24396 = build_function_type_list (V4HI_type_node,
24397 V4HI_type_node, integer_type_node,
24398 integer_type_node, NULL_TREE);
24399 tree v2si_ftype_v2si_int_int
24400 = build_function_type_list (V2SI_type_node,
24401 V2SI_type_node, integer_type_node,
24402 integer_type_node, NULL_TREE);
24403 /* Miscellaneous. */
24404 tree v8qi_ftype_v4hi_v4hi
24405 = build_function_type_list (V8QI_type_node,
24406 V4HI_type_node, V4HI_type_node, NULL_TREE);
24407 tree v4hi_ftype_v2si_v2si
24408 = build_function_type_list (V4HI_type_node,
24409 V2SI_type_node, V2SI_type_node, NULL_TREE);
24410 tree v8qi_ftype_v4hi_v8qi
24411 = build_function_type_list (V8QI_type_node,
24412 V4HI_type_node, V8QI_type_node, NULL_TREE);
24413 tree v2si_ftype_v4hi_v4hi
24414 = build_function_type_list (V2SI_type_node,
24415 V4HI_type_node, V4HI_type_node, NULL_TREE);
24416 tree v2si_ftype_v8qi_v8qi
24417 = build_function_type_list (V2SI_type_node,
24418 V8QI_type_node, V8QI_type_node, NULL_TREE);
24419 tree v4hi_ftype_v4hi_di
24420 = build_function_type_list (V4HI_type_node,
24421 V4HI_type_node, long_long_integer_type_node,
24422 NULL_TREE);
24423 tree v2si_ftype_v2si_di
24424 = build_function_type_list (V2SI_type_node,
24425 V2SI_type_node, long_long_integer_type_node,
24426 NULL_TREE);
24427 tree di_ftype_void
24428 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24429 tree int_ftype_void
24430 = build_function_type_list (integer_type_node, NULL_TREE);
24431 tree di_ftype_v8qi
24432 = build_function_type_list (long_long_integer_type_node,
24433 V8QI_type_node, NULL_TREE);
24434 tree di_ftype_v4hi
24435 = build_function_type_list (long_long_integer_type_node,
24436 V4HI_type_node, NULL_TREE);
24437 tree di_ftype_v2si
24438 = build_function_type_list (long_long_integer_type_node,
24439 V2SI_type_node, NULL_TREE);
24440 tree v2si_ftype_v4hi
24441 = build_function_type_list (V2SI_type_node,
24442 V4HI_type_node, NULL_TREE);
24443 tree v4hi_ftype_v8qi
24444 = build_function_type_list (V4HI_type_node,
24445 V8QI_type_node, NULL_TREE);
24446 tree v8qi_ftype_v8qi
24447 = build_function_type_list (V8QI_type_node,
24448 V8QI_type_node, NULL_TREE);
24449 tree v4hi_ftype_v4hi
24450 = build_function_type_list (V4HI_type_node,
24451 V4HI_type_node, NULL_TREE);
24452 tree v2si_ftype_v2si
24453 = build_function_type_list (V2SI_type_node,
24454 V2SI_type_node, NULL_TREE);
24456 tree di_ftype_di_v4hi_v4hi
24457 = build_function_type_list (long_long_unsigned_type_node,
24458 long_long_unsigned_type_node,
24459 V4HI_type_node, V4HI_type_node,
24460 NULL_TREE);
24462 tree di_ftype_v4hi_v4hi
24463 = build_function_type_list (long_long_unsigned_type_node,
24464 V4HI_type_node,V4HI_type_node,
24465 NULL_TREE);
24467 tree v2si_ftype_v2si_v4hi_v4hi
24468 = build_function_type_list (V2SI_type_node,
24469 V2SI_type_node, V4HI_type_node,
24470 V4HI_type_node, NULL_TREE);
24472 tree v2si_ftype_v2si_v8qi_v8qi
24473 = build_function_type_list (V2SI_type_node,
24474 V2SI_type_node, V8QI_type_node,
24475 V8QI_type_node, NULL_TREE);
24477 tree di_ftype_di_v2si_v2si
24478 = build_function_type_list (long_long_unsigned_type_node,
24479 long_long_unsigned_type_node,
24480 V2SI_type_node, V2SI_type_node,
24481 NULL_TREE);
24483 tree di_ftype_di_di_int
24484 = build_function_type_list (long_long_unsigned_type_node,
24485 long_long_unsigned_type_node,
24486 long_long_unsigned_type_node,
24487 integer_type_node, NULL_TREE);
24489 tree void_ftype_int
24490 = build_function_type_list (void_type_node,
24491 integer_type_node, NULL_TREE);
24493 tree v8qi_ftype_char
24494 = build_function_type_list (V8QI_type_node,
24495 signed_char_type_node, NULL_TREE);
24497 tree v4hi_ftype_short
24498 = build_function_type_list (V4HI_type_node,
24499 short_integer_type_node, NULL_TREE);
24501 tree v2si_ftype_int
24502 = build_function_type_list (V2SI_type_node,
24503 integer_type_node, NULL_TREE);
24505 /* Normal vector binops. */
24506 tree v8qi_ftype_v8qi_v8qi
24507 = build_function_type_list (V8QI_type_node,
24508 V8QI_type_node, V8QI_type_node, NULL_TREE);
24509 tree v4hi_ftype_v4hi_v4hi
24510 = build_function_type_list (V4HI_type_node,
24511 V4HI_type_node,V4HI_type_node, NULL_TREE);
24512 tree v2si_ftype_v2si_v2si
24513 = build_function_type_list (V2SI_type_node,
24514 V2SI_type_node, V2SI_type_node, NULL_TREE);
24515 tree di_ftype_di_di
24516 = build_function_type_list (long_long_unsigned_type_node,
24517 long_long_unsigned_type_node,
24518 long_long_unsigned_type_node,
24519 NULL_TREE);
24521 /* Add all builtins that are more or less simple operations on two
24522 operands. */
24523 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24525 /* Use one of the operands; the target can have a different mode for
24526 mask-generating compares. */
24527 enum machine_mode mode;
24528 tree type;
24530 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24531 continue;
24533 mode = insn_data[d->icode].operand[1].mode;
24535 switch (mode)
24537 case V8QImode:
24538 type = v8qi_ftype_v8qi_v8qi;
24539 break;
24540 case V4HImode:
24541 type = v4hi_ftype_v4hi_v4hi;
24542 break;
24543 case V2SImode:
24544 type = v2si_ftype_v2si_v2si;
24545 break;
24546 case DImode:
24547 type = di_ftype_di_di;
24548 break;
24550 default:
24551 gcc_unreachable ();
24554 def_mbuiltin (d->mask, d->name, type, d->code);
24557 /* Add the remaining MMX insns with somewhat more complicated types. */
24558 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24560 ARM_BUILTIN_ ## CODE)
24562 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24563 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24564 ARM_BUILTIN_ ## CODE)
24566 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24567 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24568 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24569 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24570 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24571 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24572 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24573 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24574 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24576 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24577 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24578 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24579 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24580 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24581 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24583 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24584 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24585 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24586 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24587 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24588 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24590 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24591 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24592 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24593 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24594 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24595 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24597 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24598 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24599 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24600 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24601 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24602 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24604 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24606 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24607 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24608 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24609 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24610 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24611 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24612 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24613 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24614 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24615 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24617 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24618 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24619 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24620 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24621 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24622 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24623 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24624 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24625 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24627 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24628 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24629 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24631 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24632 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24633 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24635 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24636 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24638 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24639 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24640 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24641 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24642 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24643 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24645 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24646 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24647 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24648 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24649 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24650 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24651 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24652 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24653 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24654 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24655 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24656 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24658 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24659 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24660 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24661 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24663 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24664 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24665 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24666 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24667 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24668 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24669 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24671 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24672 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24673 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24675 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24676 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24677 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24678 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24680 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24681 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24682 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24683 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24685 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24686 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24687 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24688 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24690 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24691 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24692 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24693 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24695 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24696 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24697 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24698 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24700 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24701 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24702 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24703 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24705 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24707 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24708 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24709 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24711 #undef iwmmx_mbuiltin
24712 #undef iwmmx2_mbuiltin
24715 static void
24716 arm_init_fp16_builtins (void)
24718 tree fp16_type = make_node (REAL_TYPE);
24719 TYPE_PRECISION (fp16_type) = 16;
24720 layout_type (fp16_type);
24721 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24724 static void
24725 arm_init_crc32_builtins ()
24727 tree si_ftype_si_qi
24728 = build_function_type_list (unsigned_intSI_type_node,
24729 unsigned_intSI_type_node,
24730 unsigned_intQI_type_node, NULL_TREE);
24731 tree si_ftype_si_hi
24732 = build_function_type_list (unsigned_intSI_type_node,
24733 unsigned_intSI_type_node,
24734 unsigned_intHI_type_node, NULL_TREE);
24735 tree si_ftype_si_si
24736 = build_function_type_list (unsigned_intSI_type_node,
24737 unsigned_intSI_type_node,
24738 unsigned_intSI_type_node, NULL_TREE);
24740 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24741 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24742 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24743 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24744 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24745 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24746 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24747 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24748 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24749 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24750 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24751 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24752 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24753 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24754 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24755 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24756 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24757 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24760 static void
24761 arm_init_builtins (void)
24763 if (TARGET_REALLY_IWMMXT)
24764 arm_init_iwmmxt_builtins ();
24766 if (TARGET_NEON)
24767 arm_init_neon_builtins ();
24769 if (arm_fp16_format)
24770 arm_init_fp16_builtins ();
24772 if (TARGET_CRC32)
24773 arm_init_crc32_builtins ();
24776 /* Return the ARM builtin for CODE. */
24778 static tree
24779 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24781 if (code >= ARM_BUILTIN_MAX)
24782 return error_mark_node;
24784 return arm_builtin_decls[code];
24787 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24789 static const char *
24790 arm_invalid_parameter_type (const_tree t)
24792 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24793 return N_("function parameters cannot have __fp16 type");
24794 return NULL;
24797 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24799 static const char *
24800 arm_invalid_return_type (const_tree t)
24802 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24803 return N_("functions cannot return __fp16 type");
24804 return NULL;
24807 /* Implement TARGET_PROMOTED_TYPE. */
24809 static tree
24810 arm_promoted_type (const_tree t)
24812 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24813 return float_type_node;
24814 return NULL_TREE;
24817 /* Implement TARGET_CONVERT_TO_TYPE.
24818 Specifically, this hook implements the peculiarity of the ARM
24819 half-precision floating-point C semantics that requires conversions between
24820 __fp16 to or from double to do an intermediate conversion to float. */
24822 static tree
24823 arm_convert_to_type (tree type, tree expr)
24825 tree fromtype = TREE_TYPE (expr);
24826 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24827 return NULL_TREE;
24828 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24829 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24830 return convert (type, convert (float_type_node, expr));
24831 return NULL_TREE;
24834 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24835 This simply adds HFmode as a supported mode; even though we don't
24836 implement arithmetic on this type directly, it's supported by
24837 optabs conversions, much the way the double-word arithmetic is
24838 special-cased in the default hook. */
24840 static bool
24841 arm_scalar_mode_supported_p (enum machine_mode mode)
24843 if (mode == HFmode)
24844 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24845 else if (ALL_FIXED_POINT_MODE_P (mode))
24846 return true;
24847 else
24848 return default_scalar_mode_supported_p (mode);
24851 /* Errors in the source file can cause expand_expr to return const0_rtx
24852 where we expect a vector. To avoid crashing, use one of the vector
24853 clear instructions. */
24855 static rtx
24856 safe_vector_operand (rtx x, enum machine_mode mode)
24858 if (x != const0_rtx)
24859 return x;
24860 x = gen_reg_rtx (mode);
24862 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24863 : gen_rtx_SUBREG (DImode, x, 0)));
24864 return x;
24867 /* Function to expand ternary builtins. */
24868 static rtx
24869 arm_expand_ternop_builtin (enum insn_code icode,
24870 tree exp, rtx target)
24872 rtx pat;
24873 tree arg0 = CALL_EXPR_ARG (exp, 0);
24874 tree arg1 = CALL_EXPR_ARG (exp, 1);
24875 tree arg2 = CALL_EXPR_ARG (exp, 2);
24877 rtx op0 = expand_normal (arg0);
24878 rtx op1 = expand_normal (arg1);
24879 rtx op2 = expand_normal (arg2);
24880 rtx op3 = NULL_RTX;
24882 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24883 lane operand depending on endianness. */
24884 bool builtin_sha1cpm_p = false;
24886 if (insn_data[icode].n_operands == 5)
24888 gcc_assert (icode == CODE_FOR_crypto_sha1c
24889 || icode == CODE_FOR_crypto_sha1p
24890 || icode == CODE_FOR_crypto_sha1m);
24891 builtin_sha1cpm_p = true;
24893 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24894 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24895 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24896 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24899 if (VECTOR_MODE_P (mode0))
24900 op0 = safe_vector_operand (op0, mode0);
24901 if (VECTOR_MODE_P (mode1))
24902 op1 = safe_vector_operand (op1, mode1);
24903 if (VECTOR_MODE_P (mode2))
24904 op2 = safe_vector_operand (op2, mode2);
24906 if (! target
24907 || GET_MODE (target) != tmode
24908 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24909 target = gen_reg_rtx (tmode);
24911 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24912 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24913 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24915 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24916 op0 = copy_to_mode_reg (mode0, op0);
24917 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24918 op1 = copy_to_mode_reg (mode1, op1);
24919 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24920 op2 = copy_to_mode_reg (mode2, op2);
24921 if (builtin_sha1cpm_p)
24922 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24924 if (builtin_sha1cpm_p)
24925 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24926 else
24927 pat = GEN_FCN (icode) (target, op0, op1, op2);
24928 if (! pat)
24929 return 0;
24930 emit_insn (pat);
24931 return target;
24934 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24936 static rtx
24937 arm_expand_binop_builtin (enum insn_code icode,
24938 tree exp, rtx target)
24940 rtx pat;
24941 tree arg0 = CALL_EXPR_ARG (exp, 0);
24942 tree arg1 = CALL_EXPR_ARG (exp, 1);
24943 rtx op0 = expand_normal (arg0);
24944 rtx op1 = expand_normal (arg1);
24945 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24946 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24947 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24949 if (VECTOR_MODE_P (mode0))
24950 op0 = safe_vector_operand (op0, mode0);
24951 if (VECTOR_MODE_P (mode1))
24952 op1 = safe_vector_operand (op1, mode1);
24954 if (! target
24955 || GET_MODE (target) != tmode
24956 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24957 target = gen_reg_rtx (tmode);
24959 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24960 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24963 op0 = copy_to_mode_reg (mode0, op0);
24964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24965 op1 = copy_to_mode_reg (mode1, op1);
24967 pat = GEN_FCN (icode) (target, op0, op1);
24968 if (! pat)
24969 return 0;
24970 emit_insn (pat);
24971 return target;
24974 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24976 static rtx
24977 arm_expand_unop_builtin (enum insn_code icode,
24978 tree exp, rtx target, int do_load)
24980 rtx pat;
24981 tree arg0 = CALL_EXPR_ARG (exp, 0);
24982 rtx op0 = expand_normal (arg0);
24983 rtx op1 = NULL_RTX;
24984 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24985 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24986 bool builtin_sha1h_p = false;
24988 if (insn_data[icode].n_operands == 3)
24990 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24991 builtin_sha1h_p = true;
24994 if (! target
24995 || GET_MODE (target) != tmode
24996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24997 target = gen_reg_rtx (tmode);
24998 if (do_load)
24999 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25000 else
25002 if (VECTOR_MODE_P (mode0))
25003 op0 = safe_vector_operand (op0, mode0);
25005 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25006 op0 = copy_to_mode_reg (mode0, op0);
25008 if (builtin_sha1h_p)
25009 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25011 if (builtin_sha1h_p)
25012 pat = GEN_FCN (icode) (target, op0, op1);
25013 else
25014 pat = GEN_FCN (icode) (target, op0);
25015 if (! pat)
25016 return 0;
25017 emit_insn (pat);
25018 return target;
25021 typedef enum {
25022 NEON_ARG_COPY_TO_REG,
25023 NEON_ARG_CONSTANT,
25024 NEON_ARG_MEMORY,
25025 NEON_ARG_STOP
25026 } builtin_arg;
25028 #define NEON_MAX_BUILTIN_ARGS 5
25030 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25031 and return an expression for the accessed memory.
25033 The intrinsic function operates on a block of registers that has
25034 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25035 function references the memory at EXP of type TYPE and in mode
25036 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25037 available. */
25039 static tree
25040 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25041 enum machine_mode reg_mode,
25042 neon_builtin_type_mode type_mode)
25044 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25045 tree elem_type, upper_bound, array_type;
25047 /* Work out the size of the register block in bytes. */
25048 reg_size = GET_MODE_SIZE (reg_mode);
25050 /* Work out the size of each vector in bytes. */
25051 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25052 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25054 /* Work out how many vectors there are. */
25055 gcc_assert (reg_size % vector_size == 0);
25056 nvectors = reg_size / vector_size;
25058 /* Work out the type of each element. */
25059 gcc_assert (POINTER_TYPE_P (type));
25060 elem_type = TREE_TYPE (type);
25062 /* Work out how many elements are being loaded or stored.
25063 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25064 and memory elements; anything else implies a lane load or store. */
25065 if (mem_mode == reg_mode)
25066 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25067 else
25068 nelems = nvectors;
25070 /* Create a type that describes the full access. */
25071 upper_bound = build_int_cst (size_type_node, nelems - 1);
25072 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25074 /* Dereference EXP using that type. */
25075 return fold_build2 (MEM_REF, array_type, exp,
25076 build_int_cst (build_pointer_type (array_type), 0));
25079 /* Expand a Neon builtin. */
25080 static rtx
25081 arm_expand_neon_args (rtx target, int icode, int have_retval,
25082 neon_builtin_type_mode type_mode,
25083 tree exp, int fcode, ...)
25085 va_list ap;
25086 rtx pat;
25087 tree arg[NEON_MAX_BUILTIN_ARGS];
25088 rtx op[NEON_MAX_BUILTIN_ARGS];
25089 tree arg_type;
25090 tree formals;
25091 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25092 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25093 enum machine_mode other_mode;
25094 int argc = 0;
25095 int opno;
25097 if (have_retval
25098 && (!target
25099 || GET_MODE (target) != tmode
25100 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25101 target = gen_reg_rtx (tmode);
25103 va_start (ap, fcode);
25105 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25107 for (;;)
25109 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25111 if (thisarg == NEON_ARG_STOP)
25112 break;
25113 else
25115 opno = argc + have_retval;
25116 mode[argc] = insn_data[icode].operand[opno].mode;
25117 arg[argc] = CALL_EXPR_ARG (exp, argc);
25118 arg_type = TREE_VALUE (formals);
25119 if (thisarg == NEON_ARG_MEMORY)
25121 other_mode = insn_data[icode].operand[1 - opno].mode;
25122 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25123 mode[argc], other_mode,
25124 type_mode);
25127 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25128 be returned. */
25129 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25130 (thisarg == NEON_ARG_MEMORY
25131 ? EXPAND_MEMORY : EXPAND_NORMAL));
25133 switch (thisarg)
25135 case NEON_ARG_COPY_TO_REG:
25136 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25137 if (!(*insn_data[icode].operand[opno].predicate)
25138 (op[argc], mode[argc]))
25139 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25140 break;
25142 case NEON_ARG_CONSTANT:
25143 /* FIXME: This error message is somewhat unhelpful. */
25144 if (!(*insn_data[icode].operand[opno].predicate)
25145 (op[argc], mode[argc]))
25146 error ("argument must be a constant");
25147 break;
25149 case NEON_ARG_MEMORY:
25150 /* Check if expand failed. */
25151 if (op[argc] == const0_rtx)
25152 return 0;
25153 gcc_assert (MEM_P (op[argc]));
25154 PUT_MODE (op[argc], mode[argc]);
25155 /* ??? arm_neon.h uses the same built-in functions for signed
25156 and unsigned accesses, casting where necessary. This isn't
25157 alias safe. */
25158 set_mem_alias_set (op[argc], 0);
25159 if (!(*insn_data[icode].operand[opno].predicate)
25160 (op[argc], mode[argc]))
25161 op[argc] = (replace_equiv_address
25162 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25163 break;
25165 case NEON_ARG_STOP:
25166 gcc_unreachable ();
25169 argc++;
25170 formals = TREE_CHAIN (formals);
25174 va_end (ap);
25176 if (have_retval)
25177 switch (argc)
25179 case 1:
25180 pat = GEN_FCN (icode) (target, op[0]);
25181 break;
25183 case 2:
25184 pat = GEN_FCN (icode) (target, op[0], op[1]);
25185 break;
25187 case 3:
25188 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25189 break;
25191 case 4:
25192 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25193 break;
25195 case 5:
25196 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25197 break;
25199 default:
25200 gcc_unreachable ();
25202 else
25203 switch (argc)
25205 case 1:
25206 pat = GEN_FCN (icode) (op[0]);
25207 break;
25209 case 2:
25210 pat = GEN_FCN (icode) (op[0], op[1]);
25211 break;
25213 case 3:
25214 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25215 break;
25217 case 4:
25218 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25219 break;
25221 case 5:
25222 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25223 break;
25225 default:
25226 gcc_unreachable ();
25229 if (!pat)
25230 return 0;
25232 emit_insn (pat);
25234 return target;
25237 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25238 constants defined per-instruction or per instruction-variant. Instead, the
25239 required info is looked up in the table neon_builtin_data. */
25240 static rtx
25241 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25243 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25244 neon_itype itype = d->itype;
25245 enum insn_code icode = d->code;
25246 neon_builtin_type_mode type_mode = d->mode;
25248 switch (itype)
25250 case NEON_UNOP:
25251 case NEON_CONVERT:
25252 case NEON_DUPLANE:
25253 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25254 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25256 case NEON_BINOP:
25257 case NEON_SETLANE:
25258 case NEON_SCALARMUL:
25259 case NEON_SCALARMULL:
25260 case NEON_SCALARMULH:
25261 case NEON_SHIFTINSERT:
25262 case NEON_LOGICBINOP:
25263 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25264 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25265 NEON_ARG_STOP);
25267 case NEON_TERNOP:
25268 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25269 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25270 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25272 case NEON_GETLANE:
25273 case NEON_FIXCONV:
25274 case NEON_SHIFTIMM:
25275 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25276 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25277 NEON_ARG_STOP);
25279 case NEON_CREATE:
25280 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25281 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25283 case NEON_DUP:
25284 case NEON_RINT:
25285 case NEON_SPLIT:
25286 case NEON_FLOAT_WIDEN:
25287 case NEON_FLOAT_NARROW:
25288 case NEON_REINTERP:
25289 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25290 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25292 case NEON_COMBINE:
25293 case NEON_VTBL:
25294 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25295 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25297 case NEON_RESULTPAIR:
25298 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25299 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25300 NEON_ARG_STOP);
25302 case NEON_LANEMUL:
25303 case NEON_LANEMULL:
25304 case NEON_LANEMULH:
25305 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25306 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25307 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25309 case NEON_LANEMAC:
25310 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25311 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25312 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25314 case NEON_SHIFTACC:
25315 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25316 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25317 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25319 case NEON_SCALARMAC:
25320 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25321 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25322 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25324 case NEON_SELECT:
25325 case NEON_VTBX:
25326 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25327 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25328 NEON_ARG_STOP);
25330 case NEON_LOAD1:
25331 case NEON_LOADSTRUCT:
25332 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25333 NEON_ARG_MEMORY, NEON_ARG_STOP);
25335 case NEON_LOAD1LANE:
25336 case NEON_LOADSTRUCTLANE:
25337 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25338 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25339 NEON_ARG_STOP);
25341 case NEON_STORE1:
25342 case NEON_STORESTRUCT:
25343 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25344 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25346 case NEON_STORE1LANE:
25347 case NEON_STORESTRUCTLANE:
25348 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25349 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25350 NEON_ARG_STOP);
25353 gcc_unreachable ();
25356 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25357 void
25358 neon_reinterpret (rtx dest, rtx src)
25360 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25363 /* Emit code to place a Neon pair result in memory locations (with equal
25364 registers). */
25365 void
25366 neon_emit_pair_result_insn (enum machine_mode mode,
25367 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25368 rtx op1, rtx op2)
25370 rtx mem = gen_rtx_MEM (mode, destaddr);
25371 rtx tmp1 = gen_reg_rtx (mode);
25372 rtx tmp2 = gen_reg_rtx (mode);
25374 emit_insn (intfn (tmp1, op1, op2, tmp2));
25376 emit_move_insn (mem, tmp1);
25377 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25378 emit_move_insn (mem, tmp2);
25381 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25382 not to early-clobber SRC registers in the process.
25384 We assume that the operands described by SRC and DEST represent a
25385 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25386 number of components into which the copy has been decomposed. */
25387 void
25388 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25390 unsigned int i;
25392 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25393 || REGNO (operands[0]) < REGNO (operands[1]))
25395 for (i = 0; i < count; i++)
25397 operands[2 * i] = dest[i];
25398 operands[2 * i + 1] = src[i];
25401 else
25403 for (i = 0; i < count; i++)
25405 operands[2 * i] = dest[count - i - 1];
25406 operands[2 * i + 1] = src[count - i - 1];
25411 /* Split operands into moves from op[1] + op[2] into op[0]. */
25413 void
25414 neon_split_vcombine (rtx operands[3])
25416 unsigned int dest = REGNO (operands[0]);
25417 unsigned int src1 = REGNO (operands[1]);
25418 unsigned int src2 = REGNO (operands[2]);
25419 enum machine_mode halfmode = GET_MODE (operands[1]);
25420 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25421 rtx destlo, desthi;
25423 if (src1 == dest && src2 == dest + halfregs)
25425 /* No-op move. Can't split to nothing; emit something. */
25426 emit_note (NOTE_INSN_DELETED);
25427 return;
25430 /* Preserve register attributes for variable tracking. */
25431 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25432 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25433 GET_MODE_SIZE (halfmode));
25435 /* Special case of reversed high/low parts. Use VSWP. */
25436 if (src2 == dest && src1 == dest + halfregs)
25438 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25439 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25440 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25441 return;
25444 if (!reg_overlap_mentioned_p (operands[2], destlo))
25446 /* Try to avoid unnecessary moves if part of the result
25447 is in the right place already. */
25448 if (src1 != dest)
25449 emit_move_insn (destlo, operands[1]);
25450 if (src2 != dest + halfregs)
25451 emit_move_insn (desthi, operands[2]);
25453 else
25455 if (src2 != dest + halfregs)
25456 emit_move_insn (desthi, operands[2]);
25457 if (src1 != dest)
25458 emit_move_insn (destlo, operands[1]);
25462 /* Expand an expression EXP that calls a built-in function,
25463 with result going to TARGET if that's convenient
25464 (and in mode MODE if that's convenient).
25465 SUBTARGET may be used as the target for computing one of EXP's operands.
25466 IGNORE is nonzero if the value is to be ignored. */
25468 static rtx
25469 arm_expand_builtin (tree exp,
25470 rtx target,
25471 rtx subtarget ATTRIBUTE_UNUSED,
25472 enum machine_mode mode ATTRIBUTE_UNUSED,
25473 int ignore ATTRIBUTE_UNUSED)
25475 const struct builtin_description * d;
25476 enum insn_code icode;
25477 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25478 tree arg0;
25479 tree arg1;
25480 tree arg2;
25481 rtx op0;
25482 rtx op1;
25483 rtx op2;
25484 rtx pat;
25485 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25486 size_t i;
25487 enum machine_mode tmode;
25488 enum machine_mode mode0;
25489 enum machine_mode mode1;
25490 enum machine_mode mode2;
25491 int opint;
25492 int selector;
25493 int mask;
25494 int imm;
25496 if (fcode >= ARM_BUILTIN_NEON_BASE)
25497 return arm_expand_neon_builtin (fcode, exp, target);
25499 switch (fcode)
25501 case ARM_BUILTIN_TEXTRMSB:
25502 case ARM_BUILTIN_TEXTRMUB:
25503 case ARM_BUILTIN_TEXTRMSH:
25504 case ARM_BUILTIN_TEXTRMUH:
25505 case ARM_BUILTIN_TEXTRMSW:
25506 case ARM_BUILTIN_TEXTRMUW:
25507 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25508 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25509 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25510 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25511 : CODE_FOR_iwmmxt_textrmw);
25513 arg0 = CALL_EXPR_ARG (exp, 0);
25514 arg1 = CALL_EXPR_ARG (exp, 1);
25515 op0 = expand_normal (arg0);
25516 op1 = expand_normal (arg1);
25517 tmode = insn_data[icode].operand[0].mode;
25518 mode0 = insn_data[icode].operand[1].mode;
25519 mode1 = insn_data[icode].operand[2].mode;
25521 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25522 op0 = copy_to_mode_reg (mode0, op0);
25523 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25525 /* @@@ better error message */
25526 error ("selector must be an immediate");
25527 return gen_reg_rtx (tmode);
25530 opint = INTVAL (op1);
25531 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25533 if (opint > 7 || opint < 0)
25534 error ("the range of selector should be in 0 to 7");
25536 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25538 if (opint > 3 || opint < 0)
25539 error ("the range of selector should be in 0 to 3");
25541 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25543 if (opint > 1 || opint < 0)
25544 error ("the range of selector should be in 0 to 1");
25547 if (target == 0
25548 || GET_MODE (target) != tmode
25549 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25550 target = gen_reg_rtx (tmode);
25551 pat = GEN_FCN (icode) (target, op0, op1);
25552 if (! pat)
25553 return 0;
25554 emit_insn (pat);
25555 return target;
25557 case ARM_BUILTIN_WALIGNI:
25558 /* If op2 is immediate, call walighi, else call walighr. */
25559 arg0 = CALL_EXPR_ARG (exp, 0);
25560 arg1 = CALL_EXPR_ARG (exp, 1);
25561 arg2 = CALL_EXPR_ARG (exp, 2);
25562 op0 = expand_normal (arg0);
25563 op1 = expand_normal (arg1);
25564 op2 = expand_normal (arg2);
25565 if (CONST_INT_P (op2))
25567 icode = CODE_FOR_iwmmxt_waligni;
25568 tmode = insn_data[icode].operand[0].mode;
25569 mode0 = insn_data[icode].operand[1].mode;
25570 mode1 = insn_data[icode].operand[2].mode;
25571 mode2 = insn_data[icode].operand[3].mode;
25572 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25573 op0 = copy_to_mode_reg (mode0, op0);
25574 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25575 op1 = copy_to_mode_reg (mode1, op1);
25576 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25577 selector = INTVAL (op2);
25578 if (selector > 7 || selector < 0)
25579 error ("the range of selector should be in 0 to 7");
25581 else
25583 icode = CODE_FOR_iwmmxt_walignr;
25584 tmode = insn_data[icode].operand[0].mode;
25585 mode0 = insn_data[icode].operand[1].mode;
25586 mode1 = insn_data[icode].operand[2].mode;
25587 mode2 = insn_data[icode].operand[3].mode;
25588 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25589 op0 = copy_to_mode_reg (mode0, op0);
25590 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25591 op1 = copy_to_mode_reg (mode1, op1);
25592 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25593 op2 = copy_to_mode_reg (mode2, op2);
25595 if (target == 0
25596 || GET_MODE (target) != tmode
25597 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25598 target = gen_reg_rtx (tmode);
25599 pat = GEN_FCN (icode) (target, op0, op1, op2);
25600 if (!pat)
25601 return 0;
25602 emit_insn (pat);
25603 return target;
25605 case ARM_BUILTIN_TINSRB:
25606 case ARM_BUILTIN_TINSRH:
25607 case ARM_BUILTIN_TINSRW:
25608 case ARM_BUILTIN_WMERGE:
25609 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25610 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25611 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25612 : CODE_FOR_iwmmxt_tinsrw);
25613 arg0 = CALL_EXPR_ARG (exp, 0);
25614 arg1 = CALL_EXPR_ARG (exp, 1);
25615 arg2 = CALL_EXPR_ARG (exp, 2);
25616 op0 = expand_normal (arg0);
25617 op1 = expand_normal (arg1);
25618 op2 = expand_normal (arg2);
25619 tmode = insn_data[icode].operand[0].mode;
25620 mode0 = insn_data[icode].operand[1].mode;
25621 mode1 = insn_data[icode].operand[2].mode;
25622 mode2 = insn_data[icode].operand[3].mode;
25624 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25625 op0 = copy_to_mode_reg (mode0, op0);
25626 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25627 op1 = copy_to_mode_reg (mode1, op1);
25628 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25630 error ("selector must be an immediate");
25631 return const0_rtx;
25633 if (icode == CODE_FOR_iwmmxt_wmerge)
25635 selector = INTVAL (op2);
25636 if (selector > 7 || selector < 0)
25637 error ("the range of selector should be in 0 to 7");
25639 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25640 || (icode == CODE_FOR_iwmmxt_tinsrh)
25641 || (icode == CODE_FOR_iwmmxt_tinsrw))
25643 mask = 0x01;
25644 selector= INTVAL (op2);
25645 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25646 error ("the range of selector should be in 0 to 7");
25647 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25648 error ("the range of selector should be in 0 to 3");
25649 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25650 error ("the range of selector should be in 0 to 1");
25651 mask <<= selector;
25652 op2 = GEN_INT (mask);
25654 if (target == 0
25655 || GET_MODE (target) != tmode
25656 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25657 target = gen_reg_rtx (tmode);
25658 pat = GEN_FCN (icode) (target, op0, op1, op2);
25659 if (! pat)
25660 return 0;
25661 emit_insn (pat);
25662 return target;
25664 case ARM_BUILTIN_SETWCGR0:
25665 case ARM_BUILTIN_SETWCGR1:
25666 case ARM_BUILTIN_SETWCGR2:
25667 case ARM_BUILTIN_SETWCGR3:
25668 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25669 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25670 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25671 : CODE_FOR_iwmmxt_setwcgr3);
25672 arg0 = CALL_EXPR_ARG (exp, 0);
25673 op0 = expand_normal (arg0);
25674 mode0 = insn_data[icode].operand[0].mode;
25675 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25676 op0 = copy_to_mode_reg (mode0, op0);
25677 pat = GEN_FCN (icode) (op0);
25678 if (!pat)
25679 return 0;
25680 emit_insn (pat);
25681 return 0;
25683 case ARM_BUILTIN_GETWCGR0:
25684 case ARM_BUILTIN_GETWCGR1:
25685 case ARM_BUILTIN_GETWCGR2:
25686 case ARM_BUILTIN_GETWCGR3:
25687 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25688 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25689 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25690 : CODE_FOR_iwmmxt_getwcgr3);
25691 tmode = insn_data[icode].operand[0].mode;
25692 if (target == 0
25693 || GET_MODE (target) != tmode
25694 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25695 target = gen_reg_rtx (tmode);
25696 pat = GEN_FCN (icode) (target);
25697 if (!pat)
25698 return 0;
25699 emit_insn (pat);
25700 return target;
25702 case ARM_BUILTIN_WSHUFH:
25703 icode = CODE_FOR_iwmmxt_wshufh;
25704 arg0 = CALL_EXPR_ARG (exp, 0);
25705 arg1 = CALL_EXPR_ARG (exp, 1);
25706 op0 = expand_normal (arg0);
25707 op1 = expand_normal (arg1);
25708 tmode = insn_data[icode].operand[0].mode;
25709 mode1 = insn_data[icode].operand[1].mode;
25710 mode2 = insn_data[icode].operand[2].mode;
25712 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25713 op0 = copy_to_mode_reg (mode1, op0);
25714 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25716 error ("mask must be an immediate");
25717 return const0_rtx;
25719 selector = INTVAL (op1);
25720 if (selector < 0 || selector > 255)
25721 error ("the range of mask should be in 0 to 255");
25722 if (target == 0
25723 || GET_MODE (target) != tmode
25724 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25725 target = gen_reg_rtx (tmode);
25726 pat = GEN_FCN (icode) (target, op0, op1);
25727 if (! pat)
25728 return 0;
25729 emit_insn (pat);
25730 return target;
25732 case ARM_BUILTIN_WMADDS:
25733 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25734 case ARM_BUILTIN_WMADDSX:
25735 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25736 case ARM_BUILTIN_WMADDSN:
25737 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25738 case ARM_BUILTIN_WMADDU:
25739 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25740 case ARM_BUILTIN_WMADDUX:
25741 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25742 case ARM_BUILTIN_WMADDUN:
25743 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25744 case ARM_BUILTIN_WSADBZ:
25745 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25746 case ARM_BUILTIN_WSADHZ:
25747 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25749 /* Several three-argument builtins. */
25750 case ARM_BUILTIN_WMACS:
25751 case ARM_BUILTIN_WMACU:
25752 case ARM_BUILTIN_TMIA:
25753 case ARM_BUILTIN_TMIAPH:
25754 case ARM_BUILTIN_TMIATT:
25755 case ARM_BUILTIN_TMIATB:
25756 case ARM_BUILTIN_TMIABT:
25757 case ARM_BUILTIN_TMIABB:
25758 case ARM_BUILTIN_WQMIABB:
25759 case ARM_BUILTIN_WQMIABT:
25760 case ARM_BUILTIN_WQMIATB:
25761 case ARM_BUILTIN_WQMIATT:
25762 case ARM_BUILTIN_WQMIABBN:
25763 case ARM_BUILTIN_WQMIABTN:
25764 case ARM_BUILTIN_WQMIATBN:
25765 case ARM_BUILTIN_WQMIATTN:
25766 case ARM_BUILTIN_WMIABB:
25767 case ARM_BUILTIN_WMIABT:
25768 case ARM_BUILTIN_WMIATB:
25769 case ARM_BUILTIN_WMIATT:
25770 case ARM_BUILTIN_WMIABBN:
25771 case ARM_BUILTIN_WMIABTN:
25772 case ARM_BUILTIN_WMIATBN:
25773 case ARM_BUILTIN_WMIATTN:
25774 case ARM_BUILTIN_WMIAWBB:
25775 case ARM_BUILTIN_WMIAWBT:
25776 case ARM_BUILTIN_WMIAWTB:
25777 case ARM_BUILTIN_WMIAWTT:
25778 case ARM_BUILTIN_WMIAWBBN:
25779 case ARM_BUILTIN_WMIAWBTN:
25780 case ARM_BUILTIN_WMIAWTBN:
25781 case ARM_BUILTIN_WMIAWTTN:
25782 case ARM_BUILTIN_WSADB:
25783 case ARM_BUILTIN_WSADH:
25784 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25785 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25786 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25787 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25788 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25789 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25790 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25791 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25792 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25793 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25794 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25795 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25796 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25797 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25798 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25799 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25800 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25801 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25802 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25803 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25804 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25805 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25806 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25807 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25808 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25809 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25810 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25811 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25812 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25813 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25814 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25815 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25816 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25817 : CODE_FOR_iwmmxt_wsadh);
25818 arg0 = CALL_EXPR_ARG (exp, 0);
25819 arg1 = CALL_EXPR_ARG (exp, 1);
25820 arg2 = CALL_EXPR_ARG (exp, 2);
25821 op0 = expand_normal (arg0);
25822 op1 = expand_normal (arg1);
25823 op2 = expand_normal (arg2);
25824 tmode = insn_data[icode].operand[0].mode;
25825 mode0 = insn_data[icode].operand[1].mode;
25826 mode1 = insn_data[icode].operand[2].mode;
25827 mode2 = insn_data[icode].operand[3].mode;
25829 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25830 op0 = copy_to_mode_reg (mode0, op0);
25831 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25832 op1 = copy_to_mode_reg (mode1, op1);
25833 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25834 op2 = copy_to_mode_reg (mode2, op2);
25835 if (target == 0
25836 || GET_MODE (target) != tmode
25837 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25838 target = gen_reg_rtx (tmode);
25839 pat = GEN_FCN (icode) (target, op0, op1, op2);
25840 if (! pat)
25841 return 0;
25842 emit_insn (pat);
25843 return target;
25845 case ARM_BUILTIN_WZERO:
25846 target = gen_reg_rtx (DImode);
25847 emit_insn (gen_iwmmxt_clrdi (target));
25848 return target;
25850 case ARM_BUILTIN_WSRLHI:
25851 case ARM_BUILTIN_WSRLWI:
25852 case ARM_BUILTIN_WSRLDI:
25853 case ARM_BUILTIN_WSLLHI:
25854 case ARM_BUILTIN_WSLLWI:
25855 case ARM_BUILTIN_WSLLDI:
25856 case ARM_BUILTIN_WSRAHI:
25857 case ARM_BUILTIN_WSRAWI:
25858 case ARM_BUILTIN_WSRADI:
25859 case ARM_BUILTIN_WRORHI:
25860 case ARM_BUILTIN_WRORWI:
25861 case ARM_BUILTIN_WRORDI:
25862 case ARM_BUILTIN_WSRLH:
25863 case ARM_BUILTIN_WSRLW:
25864 case ARM_BUILTIN_WSRLD:
25865 case ARM_BUILTIN_WSLLH:
25866 case ARM_BUILTIN_WSLLW:
25867 case ARM_BUILTIN_WSLLD:
25868 case ARM_BUILTIN_WSRAH:
25869 case ARM_BUILTIN_WSRAW:
25870 case ARM_BUILTIN_WSRAD:
25871 case ARM_BUILTIN_WRORH:
25872 case ARM_BUILTIN_WRORW:
25873 case ARM_BUILTIN_WRORD:
25874 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25875 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25876 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25877 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25878 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25879 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25880 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25881 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25882 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25883 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25884 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25885 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25886 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25887 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25888 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25889 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25890 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25891 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25892 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25893 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25894 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25895 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25896 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25897 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25898 : CODE_FOR_nothing);
25899 arg1 = CALL_EXPR_ARG (exp, 1);
25900 op1 = expand_normal (arg1);
25901 if (GET_MODE (op1) == VOIDmode)
25903 imm = INTVAL (op1);
25904 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25905 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25906 && (imm < 0 || imm > 32))
25908 if (fcode == ARM_BUILTIN_WRORHI)
25909 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25910 else if (fcode == ARM_BUILTIN_WRORWI)
25911 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25912 else if (fcode == ARM_BUILTIN_WRORH)
25913 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25914 else
25915 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25917 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25918 && (imm < 0 || imm > 64))
25920 if (fcode == ARM_BUILTIN_WRORDI)
25921 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25922 else
25923 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25925 else if (imm < 0)
25927 if (fcode == ARM_BUILTIN_WSRLHI)
25928 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25929 else if (fcode == ARM_BUILTIN_WSRLWI)
25930 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25931 else if (fcode == ARM_BUILTIN_WSRLDI)
25932 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25933 else if (fcode == ARM_BUILTIN_WSLLHI)
25934 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25935 else if (fcode == ARM_BUILTIN_WSLLWI)
25936 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25937 else if (fcode == ARM_BUILTIN_WSLLDI)
25938 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25939 else if (fcode == ARM_BUILTIN_WSRAHI)
25940 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25941 else if (fcode == ARM_BUILTIN_WSRAWI)
25942 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25943 else if (fcode == ARM_BUILTIN_WSRADI)
25944 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25945 else if (fcode == ARM_BUILTIN_WSRLH)
25946 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25947 else if (fcode == ARM_BUILTIN_WSRLW)
25948 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25949 else if (fcode == ARM_BUILTIN_WSRLD)
25950 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25951 else if (fcode == ARM_BUILTIN_WSLLH)
25952 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25953 else if (fcode == ARM_BUILTIN_WSLLW)
25954 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25955 else if (fcode == ARM_BUILTIN_WSLLD)
25956 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25957 else if (fcode == ARM_BUILTIN_WSRAH)
25958 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25959 else if (fcode == ARM_BUILTIN_WSRAW)
25960 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25961 else
25962 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25965 return arm_expand_binop_builtin (icode, exp, target);
25967 default:
25968 break;
25971 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25972 if (d->code == (const enum arm_builtins) fcode)
25973 return arm_expand_binop_builtin (d->icode, exp, target);
25975 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25976 if (d->code == (const enum arm_builtins) fcode)
25977 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25979 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25980 if (d->code == (const enum arm_builtins) fcode)
25981 return arm_expand_ternop_builtin (d->icode, exp, target);
25983 /* @@@ Should really do something sensible here. */
25984 return NULL_RTX;
25987 /* Return the number (counting from 0) of
25988 the least significant set bit in MASK. */
25990 inline static int
25991 number_of_first_bit_set (unsigned mask)
25993 return ctz_hwi (mask);
25996 /* Like emit_multi_reg_push, but allowing for a different set of
25997 registers to be described as saved. MASK is the set of registers
25998 to be saved; REAL_REGS is the set of registers to be described as
25999 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26001 static rtx
26002 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26004 unsigned long regno;
26005 rtx par[10], tmp, reg, insn;
26006 int i, j;
26008 /* Build the parallel of the registers actually being stored. */
26009 for (i = 0; mask; ++i, mask &= mask - 1)
26011 regno = ctz_hwi (mask);
26012 reg = gen_rtx_REG (SImode, regno);
26014 if (i == 0)
26015 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26016 else
26017 tmp = gen_rtx_USE (VOIDmode, reg);
26019 par[i] = tmp;
26022 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26023 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26024 tmp = gen_frame_mem (BLKmode, tmp);
26025 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26026 par[0] = tmp;
26028 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26029 insn = emit_insn (tmp);
26031 /* Always build the stack adjustment note for unwind info. */
26032 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26033 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26034 par[0] = tmp;
26036 /* Build the parallel of the registers recorded as saved for unwind. */
26037 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26039 regno = ctz_hwi (real_regs);
26040 reg = gen_rtx_REG (SImode, regno);
26042 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26043 tmp = gen_frame_mem (SImode, tmp);
26044 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26045 RTX_FRAME_RELATED_P (tmp) = 1;
26046 par[j + 1] = tmp;
26049 if (j == 0)
26050 tmp = par[0];
26051 else
26053 RTX_FRAME_RELATED_P (par[0]) = 1;
26054 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26057 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26059 return insn;
26062 /* Emit code to push or pop registers to or from the stack. F is the
26063 assembly file. MASK is the registers to pop. */
26064 static void
26065 thumb_pop (FILE *f, unsigned long mask)
26067 int regno;
26068 int lo_mask = mask & 0xFF;
26069 int pushed_words = 0;
26071 gcc_assert (mask);
26073 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26075 /* Special case. Do not generate a POP PC statement here, do it in
26076 thumb_exit() */
26077 thumb_exit (f, -1);
26078 return;
26081 fprintf (f, "\tpop\t{");
26083 /* Look at the low registers first. */
26084 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26086 if (lo_mask & 1)
26088 asm_fprintf (f, "%r", regno);
26090 if ((lo_mask & ~1) != 0)
26091 fprintf (f, ", ");
26093 pushed_words++;
26097 if (mask & (1 << PC_REGNUM))
26099 /* Catch popping the PC. */
26100 if (TARGET_INTERWORK || TARGET_BACKTRACE
26101 || crtl->calls_eh_return)
26103 /* The PC is never poped directly, instead
26104 it is popped into r3 and then BX is used. */
26105 fprintf (f, "}\n");
26107 thumb_exit (f, -1);
26109 return;
26111 else
26113 if (mask & 0xFF)
26114 fprintf (f, ", ");
26116 asm_fprintf (f, "%r", PC_REGNUM);
26120 fprintf (f, "}\n");
26123 /* Generate code to return from a thumb function.
26124 If 'reg_containing_return_addr' is -1, then the return address is
26125 actually on the stack, at the stack pointer. */
26126 static void
26127 thumb_exit (FILE *f, int reg_containing_return_addr)
26129 unsigned regs_available_for_popping;
26130 unsigned regs_to_pop;
26131 int pops_needed;
26132 unsigned available;
26133 unsigned required;
26134 enum machine_mode mode;
26135 int size;
26136 int restore_a4 = FALSE;
26138 /* Compute the registers we need to pop. */
26139 regs_to_pop = 0;
26140 pops_needed = 0;
26142 if (reg_containing_return_addr == -1)
26144 regs_to_pop |= 1 << LR_REGNUM;
26145 ++pops_needed;
26148 if (TARGET_BACKTRACE)
26150 /* Restore the (ARM) frame pointer and stack pointer. */
26151 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26152 pops_needed += 2;
26155 /* If there is nothing to pop then just emit the BX instruction and
26156 return. */
26157 if (pops_needed == 0)
26159 if (crtl->calls_eh_return)
26160 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26162 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26163 return;
26165 /* Otherwise if we are not supporting interworking and we have not created
26166 a backtrace structure and the function was not entered in ARM mode then
26167 just pop the return address straight into the PC. */
26168 else if (!TARGET_INTERWORK
26169 && !TARGET_BACKTRACE
26170 && !is_called_in_ARM_mode (current_function_decl)
26171 && !crtl->calls_eh_return)
26173 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26174 return;
26177 /* Find out how many of the (return) argument registers we can corrupt. */
26178 regs_available_for_popping = 0;
26180 /* If returning via __builtin_eh_return, the bottom three registers
26181 all contain information needed for the return. */
26182 if (crtl->calls_eh_return)
26183 size = 12;
26184 else
26186 /* If we can deduce the registers used from the function's
26187 return value. This is more reliable that examining
26188 df_regs_ever_live_p () because that will be set if the register is
26189 ever used in the function, not just if the register is used
26190 to hold a return value. */
26192 if (crtl->return_rtx != 0)
26193 mode = GET_MODE (crtl->return_rtx);
26194 else
26195 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26197 size = GET_MODE_SIZE (mode);
26199 if (size == 0)
26201 /* In a void function we can use any argument register.
26202 In a function that returns a structure on the stack
26203 we can use the second and third argument registers. */
26204 if (mode == VOIDmode)
26205 regs_available_for_popping =
26206 (1 << ARG_REGISTER (1))
26207 | (1 << ARG_REGISTER (2))
26208 | (1 << ARG_REGISTER (3));
26209 else
26210 regs_available_for_popping =
26211 (1 << ARG_REGISTER (2))
26212 | (1 << ARG_REGISTER (3));
26214 else if (size <= 4)
26215 regs_available_for_popping =
26216 (1 << ARG_REGISTER (2))
26217 | (1 << ARG_REGISTER (3));
26218 else if (size <= 8)
26219 regs_available_for_popping =
26220 (1 << ARG_REGISTER (3));
26223 /* Match registers to be popped with registers into which we pop them. */
26224 for (available = regs_available_for_popping,
26225 required = regs_to_pop;
26226 required != 0 && available != 0;
26227 available &= ~(available & - available),
26228 required &= ~(required & - required))
26229 -- pops_needed;
26231 /* If we have any popping registers left over, remove them. */
26232 if (available > 0)
26233 regs_available_for_popping &= ~available;
26235 /* Otherwise if we need another popping register we can use
26236 the fourth argument register. */
26237 else if (pops_needed)
26239 /* If we have not found any free argument registers and
26240 reg a4 contains the return address, we must move it. */
26241 if (regs_available_for_popping == 0
26242 && reg_containing_return_addr == LAST_ARG_REGNUM)
26244 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26245 reg_containing_return_addr = LR_REGNUM;
26247 else if (size > 12)
26249 /* Register a4 is being used to hold part of the return value,
26250 but we have dire need of a free, low register. */
26251 restore_a4 = TRUE;
26253 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26256 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26258 /* The fourth argument register is available. */
26259 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26261 --pops_needed;
26265 /* Pop as many registers as we can. */
26266 thumb_pop (f, regs_available_for_popping);
26268 /* Process the registers we popped. */
26269 if (reg_containing_return_addr == -1)
26271 /* The return address was popped into the lowest numbered register. */
26272 regs_to_pop &= ~(1 << LR_REGNUM);
26274 reg_containing_return_addr =
26275 number_of_first_bit_set (regs_available_for_popping);
26277 /* Remove this register for the mask of available registers, so that
26278 the return address will not be corrupted by further pops. */
26279 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26282 /* If we popped other registers then handle them here. */
26283 if (regs_available_for_popping)
26285 int frame_pointer;
26287 /* Work out which register currently contains the frame pointer. */
26288 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26290 /* Move it into the correct place. */
26291 asm_fprintf (f, "\tmov\t%r, %r\n",
26292 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26294 /* (Temporarily) remove it from the mask of popped registers. */
26295 regs_available_for_popping &= ~(1 << frame_pointer);
26296 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26298 if (regs_available_for_popping)
26300 int stack_pointer;
26302 /* We popped the stack pointer as well,
26303 find the register that contains it. */
26304 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26306 /* Move it into the stack register. */
26307 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26309 /* At this point we have popped all necessary registers, so
26310 do not worry about restoring regs_available_for_popping
26311 to its correct value:
26313 assert (pops_needed == 0)
26314 assert (regs_available_for_popping == (1 << frame_pointer))
26315 assert (regs_to_pop == (1 << STACK_POINTER)) */
26317 else
26319 /* Since we have just move the popped value into the frame
26320 pointer, the popping register is available for reuse, and
26321 we know that we still have the stack pointer left to pop. */
26322 regs_available_for_popping |= (1 << frame_pointer);
26326 /* If we still have registers left on the stack, but we no longer have
26327 any registers into which we can pop them, then we must move the return
26328 address into the link register and make available the register that
26329 contained it. */
26330 if (regs_available_for_popping == 0 && pops_needed > 0)
26332 regs_available_for_popping |= 1 << reg_containing_return_addr;
26334 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26335 reg_containing_return_addr);
26337 reg_containing_return_addr = LR_REGNUM;
26340 /* If we have registers left on the stack then pop some more.
26341 We know that at most we will want to pop FP and SP. */
26342 if (pops_needed > 0)
26344 int popped_into;
26345 int move_to;
26347 thumb_pop (f, regs_available_for_popping);
26349 /* We have popped either FP or SP.
26350 Move whichever one it is into the correct register. */
26351 popped_into = number_of_first_bit_set (regs_available_for_popping);
26352 move_to = number_of_first_bit_set (regs_to_pop);
26354 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26356 regs_to_pop &= ~(1 << move_to);
26358 --pops_needed;
26361 /* If we still have not popped everything then we must have only
26362 had one register available to us and we are now popping the SP. */
26363 if (pops_needed > 0)
26365 int popped_into;
26367 thumb_pop (f, regs_available_for_popping);
26369 popped_into = number_of_first_bit_set (regs_available_for_popping);
26371 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26373 assert (regs_to_pop == (1 << STACK_POINTER))
26374 assert (pops_needed == 1)
26378 /* If necessary restore the a4 register. */
26379 if (restore_a4)
26381 if (reg_containing_return_addr != LR_REGNUM)
26383 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26384 reg_containing_return_addr = LR_REGNUM;
26387 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26390 if (crtl->calls_eh_return)
26391 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26393 /* Return to caller. */
26394 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26397 /* Scan INSN just before assembler is output for it.
26398 For Thumb-1, we track the status of the condition codes; this
26399 information is used in the cbranchsi4_insn pattern. */
26400 void
26401 thumb1_final_prescan_insn (rtx insn)
26403 if (flag_print_asm_name)
26404 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26405 INSN_ADDRESSES (INSN_UID (insn)));
26406 /* Don't overwrite the previous setter when we get to a cbranch. */
26407 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26409 enum attr_conds conds;
26411 if (cfun->machine->thumb1_cc_insn)
26413 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26414 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26415 CC_STATUS_INIT;
26417 conds = get_attr_conds (insn);
26418 if (conds == CONDS_SET)
26420 rtx set = single_set (insn);
26421 cfun->machine->thumb1_cc_insn = insn;
26422 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26423 cfun->machine->thumb1_cc_op1 = const0_rtx;
26424 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26425 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26427 rtx src1 = XEXP (SET_SRC (set), 1);
26428 if (src1 == const0_rtx)
26429 cfun->machine->thumb1_cc_mode = CCmode;
26431 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26433 /* Record the src register operand instead of dest because
26434 cprop_hardreg pass propagates src. */
26435 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26438 else if (conds != CONDS_NOCOND)
26439 cfun->machine->thumb1_cc_insn = NULL_RTX;
26442 /* Check if unexpected far jump is used. */
26443 if (cfun->machine->lr_save_eliminated
26444 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26445 internal_error("Unexpected thumb1 far jump");
26449 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26451 unsigned HOST_WIDE_INT mask = 0xff;
26452 int i;
26454 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26455 if (val == 0) /* XXX */
26456 return 0;
26458 for (i = 0; i < 25; i++)
26459 if ((val & (mask << i)) == val)
26460 return 1;
26462 return 0;
26465 /* Returns nonzero if the current function contains,
26466 or might contain a far jump. */
26467 static int
26468 thumb_far_jump_used_p (void)
26470 rtx insn;
26471 bool far_jump = false;
26472 unsigned int func_size = 0;
26474 /* This test is only important for leaf functions. */
26475 /* assert (!leaf_function_p ()); */
26477 /* If we have already decided that far jumps may be used,
26478 do not bother checking again, and always return true even if
26479 it turns out that they are not being used. Once we have made
26480 the decision that far jumps are present (and that hence the link
26481 register will be pushed onto the stack) we cannot go back on it. */
26482 if (cfun->machine->far_jump_used)
26483 return 1;
26485 /* If this function is not being called from the prologue/epilogue
26486 generation code then it must be being called from the
26487 INITIAL_ELIMINATION_OFFSET macro. */
26488 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26490 /* In this case we know that we are being asked about the elimination
26491 of the arg pointer register. If that register is not being used,
26492 then there are no arguments on the stack, and we do not have to
26493 worry that a far jump might force the prologue to push the link
26494 register, changing the stack offsets. In this case we can just
26495 return false, since the presence of far jumps in the function will
26496 not affect stack offsets.
26498 If the arg pointer is live (or if it was live, but has now been
26499 eliminated and so set to dead) then we do have to test to see if
26500 the function might contain a far jump. This test can lead to some
26501 false negatives, since before reload is completed, then length of
26502 branch instructions is not known, so gcc defaults to returning their
26503 longest length, which in turn sets the far jump attribute to true.
26505 A false negative will not result in bad code being generated, but it
26506 will result in a needless push and pop of the link register. We
26507 hope that this does not occur too often.
26509 If we need doubleword stack alignment this could affect the other
26510 elimination offsets so we can't risk getting it wrong. */
26511 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26512 cfun->machine->arg_pointer_live = 1;
26513 else if (!cfun->machine->arg_pointer_live)
26514 return 0;
26517 /* We should not change far_jump_used during or after reload, as there is
26518 no chance to change stack frame layout. */
26519 if (reload_in_progress || reload_completed)
26520 return 0;
26522 /* Check to see if the function contains a branch
26523 insn with the far jump attribute set. */
26524 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26526 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26528 far_jump = true;
26530 func_size += get_attr_length (insn);
26533 /* Attribute far_jump will always be true for thumb1 before
26534 shorten_branch pass. So checking far_jump attribute before
26535 shorten_branch isn't much useful.
26537 Following heuristic tries to estimate more accurately if a far jump
26538 may finally be used. The heuristic is very conservative as there is
26539 no chance to roll-back the decision of not to use far jump.
26541 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26542 2-byte insn is associated with a 4 byte constant pool. Using
26543 function size 2048/3 as the threshold is conservative enough. */
26544 if (far_jump)
26546 if ((func_size * 3) >= 2048)
26548 /* Record the fact that we have decided that
26549 the function does use far jumps. */
26550 cfun->machine->far_jump_used = 1;
26551 return 1;
26555 return 0;
26558 /* Return nonzero if FUNC must be entered in ARM mode. */
26560 is_called_in_ARM_mode (tree func)
26562 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26564 /* Ignore the problem about functions whose address is taken. */
26565 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26566 return TRUE;
26568 #ifdef ARM_PE
26569 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26570 #else
26571 return FALSE;
26572 #endif
26575 /* Given the stack offsets and register mask in OFFSETS, decide how
26576 many additional registers to push instead of subtracting a constant
26577 from SP. For epilogues the principle is the same except we use pop.
26578 FOR_PROLOGUE indicates which we're generating. */
26579 static int
26580 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26582 HOST_WIDE_INT amount;
26583 unsigned long live_regs_mask = offsets->saved_regs_mask;
26584 /* Extract a mask of the ones we can give to the Thumb's push/pop
26585 instruction. */
26586 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26587 /* Then count how many other high registers will need to be pushed. */
26588 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26589 int n_free, reg_base, size;
26591 if (!for_prologue && frame_pointer_needed)
26592 amount = offsets->locals_base - offsets->saved_regs;
26593 else
26594 amount = offsets->outgoing_args - offsets->saved_regs;
26596 /* If the stack frame size is 512 exactly, we can save one load
26597 instruction, which should make this a win even when optimizing
26598 for speed. */
26599 if (!optimize_size && amount != 512)
26600 return 0;
26602 /* Can't do this if there are high registers to push. */
26603 if (high_regs_pushed != 0)
26604 return 0;
26606 /* Shouldn't do it in the prologue if no registers would normally
26607 be pushed at all. In the epilogue, also allow it if we'll have
26608 a pop insn for the PC. */
26609 if (l_mask == 0
26610 && (for_prologue
26611 || TARGET_BACKTRACE
26612 || (live_regs_mask & 1 << LR_REGNUM) == 0
26613 || TARGET_INTERWORK
26614 || crtl->args.pretend_args_size != 0))
26615 return 0;
26617 /* Don't do this if thumb_expand_prologue wants to emit instructions
26618 between the push and the stack frame allocation. */
26619 if (for_prologue
26620 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26621 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26622 return 0;
26624 reg_base = 0;
26625 n_free = 0;
26626 if (!for_prologue)
26628 size = arm_size_return_regs ();
26629 reg_base = ARM_NUM_INTS (size);
26630 live_regs_mask >>= reg_base;
26633 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26634 && (for_prologue || call_used_regs[reg_base + n_free]))
26636 live_regs_mask >>= 1;
26637 n_free++;
26640 if (n_free == 0)
26641 return 0;
26642 gcc_assert (amount / 4 * 4 == amount);
26644 if (amount >= 512 && (amount - n_free * 4) < 512)
26645 return (amount - 508) / 4;
26646 if (amount <= n_free * 4)
26647 return amount / 4;
26648 return 0;
26651 /* The bits which aren't usefully expanded as rtl. */
26652 const char *
26653 thumb1_unexpanded_epilogue (void)
26655 arm_stack_offsets *offsets;
26656 int regno;
26657 unsigned long live_regs_mask = 0;
26658 int high_regs_pushed = 0;
26659 int extra_pop;
26660 int had_to_push_lr;
26661 int size;
26663 if (cfun->machine->return_used_this_function != 0)
26664 return "";
26666 if (IS_NAKED (arm_current_func_type ()))
26667 return "";
26669 offsets = arm_get_frame_offsets ();
26670 live_regs_mask = offsets->saved_regs_mask;
26671 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26673 /* If we can deduce the registers used from the function's return value.
26674 This is more reliable that examining df_regs_ever_live_p () because that
26675 will be set if the register is ever used in the function, not just if
26676 the register is used to hold a return value. */
26677 size = arm_size_return_regs ();
26679 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26680 if (extra_pop > 0)
26682 unsigned long extra_mask = (1 << extra_pop) - 1;
26683 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26686 /* The prolog may have pushed some high registers to use as
26687 work registers. e.g. the testsuite file:
26688 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26689 compiles to produce:
26690 push {r4, r5, r6, r7, lr}
26691 mov r7, r9
26692 mov r6, r8
26693 push {r6, r7}
26694 as part of the prolog. We have to undo that pushing here. */
26696 if (high_regs_pushed)
26698 unsigned long mask = live_regs_mask & 0xff;
26699 int next_hi_reg;
26701 /* The available low registers depend on the size of the value we are
26702 returning. */
26703 if (size <= 12)
26704 mask |= 1 << 3;
26705 if (size <= 8)
26706 mask |= 1 << 2;
26708 if (mask == 0)
26709 /* Oh dear! We have no low registers into which we can pop
26710 high registers! */
26711 internal_error
26712 ("no low registers available for popping high registers");
26714 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26715 if (live_regs_mask & (1 << next_hi_reg))
26716 break;
26718 while (high_regs_pushed)
26720 /* Find lo register(s) into which the high register(s) can
26721 be popped. */
26722 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26724 if (mask & (1 << regno))
26725 high_regs_pushed--;
26726 if (high_regs_pushed == 0)
26727 break;
26730 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26732 /* Pop the values into the low register(s). */
26733 thumb_pop (asm_out_file, mask);
26735 /* Move the value(s) into the high registers. */
26736 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26738 if (mask & (1 << regno))
26740 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26741 regno);
26743 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26744 if (live_regs_mask & (1 << next_hi_reg))
26745 break;
26749 live_regs_mask &= ~0x0f00;
26752 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26753 live_regs_mask &= 0xff;
26755 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26757 /* Pop the return address into the PC. */
26758 if (had_to_push_lr)
26759 live_regs_mask |= 1 << PC_REGNUM;
26761 /* Either no argument registers were pushed or a backtrace
26762 structure was created which includes an adjusted stack
26763 pointer, so just pop everything. */
26764 if (live_regs_mask)
26765 thumb_pop (asm_out_file, live_regs_mask);
26767 /* We have either just popped the return address into the
26768 PC or it is was kept in LR for the entire function.
26769 Note that thumb_pop has already called thumb_exit if the
26770 PC was in the list. */
26771 if (!had_to_push_lr)
26772 thumb_exit (asm_out_file, LR_REGNUM);
26774 else
26776 /* Pop everything but the return address. */
26777 if (live_regs_mask)
26778 thumb_pop (asm_out_file, live_regs_mask);
26780 if (had_to_push_lr)
26782 if (size > 12)
26784 /* We have no free low regs, so save one. */
26785 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26786 LAST_ARG_REGNUM);
26789 /* Get the return address into a temporary register. */
26790 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26792 if (size > 12)
26794 /* Move the return address to lr. */
26795 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26796 LAST_ARG_REGNUM);
26797 /* Restore the low register. */
26798 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26799 IP_REGNUM);
26800 regno = LR_REGNUM;
26802 else
26803 regno = LAST_ARG_REGNUM;
26805 else
26806 regno = LR_REGNUM;
26808 /* Remove the argument registers that were pushed onto the stack. */
26809 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26810 SP_REGNUM, SP_REGNUM,
26811 crtl->args.pretend_args_size);
26813 thumb_exit (asm_out_file, regno);
26816 return "";
26819 /* Functions to save and restore machine-specific function data. */
26820 static struct machine_function *
26821 arm_init_machine_status (void)
26823 struct machine_function *machine;
26824 machine = ggc_alloc_cleared_machine_function ();
26826 #if ARM_FT_UNKNOWN != 0
26827 machine->func_type = ARM_FT_UNKNOWN;
26828 #endif
26829 return machine;
26832 /* Return an RTX indicating where the return address to the
26833 calling function can be found. */
26835 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26837 if (count != 0)
26838 return NULL_RTX;
26840 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26843 /* Do anything needed before RTL is emitted for each function. */
26844 void
26845 arm_init_expanders (void)
26847 /* Arrange to initialize and mark the machine per-function status. */
26848 init_machine_status = arm_init_machine_status;
26850 /* This is to stop the combine pass optimizing away the alignment
26851 adjustment of va_arg. */
26852 /* ??? It is claimed that this should not be necessary. */
26853 if (cfun)
26854 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26858 /* Like arm_compute_initial_elimination offset. Simpler because there
26859 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26860 to point at the base of the local variables after static stack
26861 space for a function has been allocated. */
26863 HOST_WIDE_INT
26864 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26866 arm_stack_offsets *offsets;
26868 offsets = arm_get_frame_offsets ();
26870 switch (from)
26872 case ARG_POINTER_REGNUM:
26873 switch (to)
26875 case STACK_POINTER_REGNUM:
26876 return offsets->outgoing_args - offsets->saved_args;
26878 case FRAME_POINTER_REGNUM:
26879 return offsets->soft_frame - offsets->saved_args;
26881 case ARM_HARD_FRAME_POINTER_REGNUM:
26882 return offsets->saved_regs - offsets->saved_args;
26884 case THUMB_HARD_FRAME_POINTER_REGNUM:
26885 return offsets->locals_base - offsets->saved_args;
26887 default:
26888 gcc_unreachable ();
26890 break;
26892 case FRAME_POINTER_REGNUM:
26893 switch (to)
26895 case STACK_POINTER_REGNUM:
26896 return offsets->outgoing_args - offsets->soft_frame;
26898 case ARM_HARD_FRAME_POINTER_REGNUM:
26899 return offsets->saved_regs - offsets->soft_frame;
26901 case THUMB_HARD_FRAME_POINTER_REGNUM:
26902 return offsets->locals_base - offsets->soft_frame;
26904 default:
26905 gcc_unreachable ();
26907 break;
26909 default:
26910 gcc_unreachable ();
26914 /* Generate the function's prologue. */
26916 void
26917 thumb1_expand_prologue (void)
26919 rtx insn;
26921 HOST_WIDE_INT amount;
26922 arm_stack_offsets *offsets;
26923 unsigned long func_type;
26924 int regno;
26925 unsigned long live_regs_mask;
26926 unsigned long l_mask;
26927 unsigned high_regs_pushed = 0;
26929 func_type = arm_current_func_type ();
26931 /* Naked functions don't have prologues. */
26932 if (IS_NAKED (func_type))
26933 return;
26935 if (IS_INTERRUPT (func_type))
26937 error ("interrupt Service Routines cannot be coded in Thumb mode");
26938 return;
26941 if (is_called_in_ARM_mode (current_function_decl))
26942 emit_insn (gen_prologue_thumb1_interwork ());
26944 offsets = arm_get_frame_offsets ();
26945 live_regs_mask = offsets->saved_regs_mask;
26947 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26948 l_mask = live_regs_mask & 0x40ff;
26949 /* Then count how many other high registers will need to be pushed. */
26950 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26952 if (crtl->args.pretend_args_size)
26954 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26956 if (cfun->machine->uses_anonymous_args)
26958 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26959 unsigned long mask;
26961 mask = 1ul << (LAST_ARG_REGNUM + 1);
26962 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26964 insn = thumb1_emit_multi_reg_push (mask, 0);
26966 else
26968 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26969 stack_pointer_rtx, x));
26971 RTX_FRAME_RELATED_P (insn) = 1;
26974 if (TARGET_BACKTRACE)
26976 HOST_WIDE_INT offset = 0;
26977 unsigned work_register;
26978 rtx work_reg, x, arm_hfp_rtx;
26980 /* We have been asked to create a stack backtrace structure.
26981 The code looks like this:
26983 0 .align 2
26984 0 func:
26985 0 sub SP, #16 Reserve space for 4 registers.
26986 2 push {R7} Push low registers.
26987 4 add R7, SP, #20 Get the stack pointer before the push.
26988 6 str R7, [SP, #8] Store the stack pointer
26989 (before reserving the space).
26990 8 mov R7, PC Get hold of the start of this code + 12.
26991 10 str R7, [SP, #16] Store it.
26992 12 mov R7, FP Get hold of the current frame pointer.
26993 14 str R7, [SP, #4] Store it.
26994 16 mov R7, LR Get hold of the current return address.
26995 18 str R7, [SP, #12] Store it.
26996 20 add R7, SP, #16 Point at the start of the
26997 backtrace structure.
26998 22 mov FP, R7 Put this value into the frame pointer. */
27000 work_register = thumb_find_work_register (live_regs_mask);
27001 work_reg = gen_rtx_REG (SImode, work_register);
27002 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27004 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27005 stack_pointer_rtx, GEN_INT (-16)));
27006 RTX_FRAME_RELATED_P (insn) = 1;
27008 if (l_mask)
27010 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27011 RTX_FRAME_RELATED_P (insn) = 1;
27013 offset = bit_count (l_mask) * UNITS_PER_WORD;
27016 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27017 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27019 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27020 x = gen_frame_mem (SImode, x);
27021 emit_move_insn (x, work_reg);
27023 /* Make sure that the instruction fetching the PC is in the right place
27024 to calculate "start of backtrace creation code + 12". */
27025 /* ??? The stores using the common WORK_REG ought to be enough to
27026 prevent the scheduler from doing anything weird. Failing that
27027 we could always move all of the following into an UNSPEC_VOLATILE. */
27028 if (l_mask)
27030 x = gen_rtx_REG (SImode, PC_REGNUM);
27031 emit_move_insn (work_reg, x);
27033 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27034 x = gen_frame_mem (SImode, x);
27035 emit_move_insn (x, work_reg);
27037 emit_move_insn (work_reg, arm_hfp_rtx);
27039 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27040 x = gen_frame_mem (SImode, x);
27041 emit_move_insn (x, work_reg);
27043 else
27045 emit_move_insn (work_reg, arm_hfp_rtx);
27047 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27048 x = gen_frame_mem (SImode, x);
27049 emit_move_insn (x, work_reg);
27051 x = gen_rtx_REG (SImode, PC_REGNUM);
27052 emit_move_insn (work_reg, x);
27054 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27055 x = gen_frame_mem (SImode, x);
27056 emit_move_insn (x, work_reg);
27059 x = gen_rtx_REG (SImode, LR_REGNUM);
27060 emit_move_insn (work_reg, x);
27062 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27063 x = gen_frame_mem (SImode, x);
27064 emit_move_insn (x, work_reg);
27066 x = GEN_INT (offset + 12);
27067 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27069 emit_move_insn (arm_hfp_rtx, work_reg);
27071 /* Optimization: If we are not pushing any low registers but we are going
27072 to push some high registers then delay our first push. This will just
27073 be a push of LR and we can combine it with the push of the first high
27074 register. */
27075 else if ((l_mask & 0xff) != 0
27076 || (high_regs_pushed == 0 && l_mask))
27078 unsigned long mask = l_mask;
27079 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27080 insn = thumb1_emit_multi_reg_push (mask, mask);
27081 RTX_FRAME_RELATED_P (insn) = 1;
27084 if (high_regs_pushed)
27086 unsigned pushable_regs;
27087 unsigned next_hi_reg;
27088 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27089 : crtl->args.info.nregs;
27090 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27092 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27093 if (live_regs_mask & (1 << next_hi_reg))
27094 break;
27096 /* Here we need to mask out registers used for passing arguments
27097 even if they can be pushed. This is to avoid using them to stash the high
27098 registers. Such kind of stash may clobber the use of arguments. */
27099 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27101 if (pushable_regs == 0)
27102 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27104 while (high_regs_pushed > 0)
27106 unsigned long real_regs_mask = 0;
27108 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27110 if (pushable_regs & (1 << regno))
27112 emit_move_insn (gen_rtx_REG (SImode, regno),
27113 gen_rtx_REG (SImode, next_hi_reg));
27115 high_regs_pushed --;
27116 real_regs_mask |= (1 << next_hi_reg);
27118 if (high_regs_pushed)
27120 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27121 next_hi_reg --)
27122 if (live_regs_mask & (1 << next_hi_reg))
27123 break;
27125 else
27127 pushable_regs &= ~((1 << regno) - 1);
27128 break;
27133 /* If we had to find a work register and we have not yet
27134 saved the LR then add it to the list of regs to push. */
27135 if (l_mask == (1 << LR_REGNUM))
27137 pushable_regs |= l_mask;
27138 real_regs_mask |= l_mask;
27139 l_mask = 0;
27142 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27143 RTX_FRAME_RELATED_P (insn) = 1;
27147 /* Load the pic register before setting the frame pointer,
27148 so we can use r7 as a temporary work register. */
27149 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27150 arm_load_pic_register (live_regs_mask);
27152 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27153 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27154 stack_pointer_rtx);
27156 if (flag_stack_usage_info)
27157 current_function_static_stack_size
27158 = offsets->outgoing_args - offsets->saved_args;
27160 amount = offsets->outgoing_args - offsets->saved_regs;
27161 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27162 if (amount)
27164 if (amount < 512)
27166 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27167 GEN_INT (- amount)));
27168 RTX_FRAME_RELATED_P (insn) = 1;
27170 else
27172 rtx reg, dwarf;
27174 /* The stack decrement is too big for an immediate value in a single
27175 insn. In theory we could issue multiple subtracts, but after
27176 three of them it becomes more space efficient to place the full
27177 value in the constant pool and load into a register. (Also the
27178 ARM debugger really likes to see only one stack decrement per
27179 function). So instead we look for a scratch register into which
27180 we can load the decrement, and then we subtract this from the
27181 stack pointer. Unfortunately on the thumb the only available
27182 scratch registers are the argument registers, and we cannot use
27183 these as they may hold arguments to the function. Instead we
27184 attempt to locate a call preserved register which is used by this
27185 function. If we can find one, then we know that it will have
27186 been pushed at the start of the prologue and so we can corrupt
27187 it now. */
27188 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27189 if (live_regs_mask & (1 << regno))
27190 break;
27192 gcc_assert(regno <= LAST_LO_REGNUM);
27194 reg = gen_rtx_REG (SImode, regno);
27196 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27198 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27199 stack_pointer_rtx, reg));
27201 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27202 plus_constant (Pmode, stack_pointer_rtx,
27203 -amount));
27204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27205 RTX_FRAME_RELATED_P (insn) = 1;
27209 if (frame_pointer_needed)
27210 thumb_set_frame_pointer (offsets);
27212 /* If we are profiling, make sure no instructions are scheduled before
27213 the call to mcount. Similarly if the user has requested no
27214 scheduling in the prolog. Similarly if we want non-call exceptions
27215 using the EABI unwinder, to prevent faulting instructions from being
27216 swapped with a stack adjustment. */
27217 if (crtl->profile || !TARGET_SCHED_PROLOG
27218 || (arm_except_unwind_info (&global_options) == UI_TARGET
27219 && cfun->can_throw_non_call_exceptions))
27220 emit_insn (gen_blockage ());
27222 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27223 if (live_regs_mask & 0xff)
27224 cfun->machine->lr_save_eliminated = 0;
27227 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27228 POP instruction can be generated. LR should be replaced by PC. All
27229 the checks required are already done by USE_RETURN_INSN (). Hence,
27230 all we really need to check here is if single register is to be
27231 returned, or multiple register return. */
27232 void
27233 thumb2_expand_return (bool simple_return)
27235 int i, num_regs;
27236 unsigned long saved_regs_mask;
27237 arm_stack_offsets *offsets;
27239 offsets = arm_get_frame_offsets ();
27240 saved_regs_mask = offsets->saved_regs_mask;
27242 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27243 if (saved_regs_mask & (1 << i))
27244 num_regs++;
27246 if (!simple_return && saved_regs_mask)
27248 if (num_regs == 1)
27250 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27251 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27252 rtx addr = gen_rtx_MEM (SImode,
27253 gen_rtx_POST_INC (SImode,
27254 stack_pointer_rtx));
27255 set_mem_alias_set (addr, get_frame_alias_set ());
27256 XVECEXP (par, 0, 0) = ret_rtx;
27257 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27258 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27259 emit_jump_insn (par);
27261 else
27263 saved_regs_mask &= ~ (1 << LR_REGNUM);
27264 saved_regs_mask |= (1 << PC_REGNUM);
27265 arm_emit_multi_reg_pop (saved_regs_mask);
27268 else
27270 emit_jump_insn (simple_return_rtx);
27274 void
27275 thumb1_expand_epilogue (void)
27277 HOST_WIDE_INT amount;
27278 arm_stack_offsets *offsets;
27279 int regno;
27281 /* Naked functions don't have prologues. */
27282 if (IS_NAKED (arm_current_func_type ()))
27283 return;
27285 offsets = arm_get_frame_offsets ();
27286 amount = offsets->outgoing_args - offsets->saved_regs;
27288 if (frame_pointer_needed)
27290 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27291 amount = offsets->locals_base - offsets->saved_regs;
27293 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27295 gcc_assert (amount >= 0);
27296 if (amount)
27298 emit_insn (gen_blockage ());
27300 if (amount < 512)
27301 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27302 GEN_INT (amount)));
27303 else
27305 /* r3 is always free in the epilogue. */
27306 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27308 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27309 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27313 /* Emit a USE (stack_pointer_rtx), so that
27314 the stack adjustment will not be deleted. */
27315 emit_insn (gen_force_register_use (stack_pointer_rtx));
27317 if (crtl->profile || !TARGET_SCHED_PROLOG)
27318 emit_insn (gen_blockage ());
27320 /* Emit a clobber for each insn that will be restored in the epilogue,
27321 so that flow2 will get register lifetimes correct. */
27322 for (regno = 0; regno < 13; regno++)
27323 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27324 emit_clobber (gen_rtx_REG (SImode, regno));
27326 if (! df_regs_ever_live_p (LR_REGNUM))
27327 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27330 /* Epilogue code for APCS frame. */
27331 static void
27332 arm_expand_epilogue_apcs_frame (bool really_return)
27334 unsigned long func_type;
27335 unsigned long saved_regs_mask;
27336 int num_regs = 0;
27337 int i;
27338 int floats_from_frame = 0;
27339 arm_stack_offsets *offsets;
27341 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27342 func_type = arm_current_func_type ();
27344 /* Get frame offsets for ARM. */
27345 offsets = arm_get_frame_offsets ();
27346 saved_regs_mask = offsets->saved_regs_mask;
27348 /* Find the offset of the floating-point save area in the frame. */
27349 floats_from_frame
27350 = (offsets->saved_args
27351 + arm_compute_static_chain_stack_bytes ()
27352 - offsets->frame);
27354 /* Compute how many core registers saved and how far away the floats are. */
27355 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27356 if (saved_regs_mask & (1 << i))
27358 num_regs++;
27359 floats_from_frame += 4;
27362 if (TARGET_HARD_FLOAT && TARGET_VFP)
27364 int start_reg;
27365 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27367 /* The offset is from IP_REGNUM. */
27368 int saved_size = arm_get_vfp_saved_size ();
27369 if (saved_size > 0)
27371 rtx insn;
27372 floats_from_frame += saved_size;
27373 insn = emit_insn (gen_addsi3 (ip_rtx,
27374 hard_frame_pointer_rtx,
27375 GEN_INT (-floats_from_frame)));
27376 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27377 ip_rtx, hard_frame_pointer_rtx);
27380 /* Generate VFP register multi-pop. */
27381 start_reg = FIRST_VFP_REGNUM;
27383 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27384 /* Look for a case where a reg does not need restoring. */
27385 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27386 && (!df_regs_ever_live_p (i + 1)
27387 || call_used_regs[i + 1]))
27389 if (start_reg != i)
27390 arm_emit_vfp_multi_reg_pop (start_reg,
27391 (i - start_reg) / 2,
27392 gen_rtx_REG (SImode,
27393 IP_REGNUM));
27394 start_reg = i + 2;
27397 /* Restore the remaining regs that we have discovered (or possibly
27398 even all of them, if the conditional in the for loop never
27399 fired). */
27400 if (start_reg != i)
27401 arm_emit_vfp_multi_reg_pop (start_reg,
27402 (i - start_reg) / 2,
27403 gen_rtx_REG (SImode, IP_REGNUM));
27406 if (TARGET_IWMMXT)
27408 /* The frame pointer is guaranteed to be non-double-word aligned, as
27409 it is set to double-word-aligned old_stack_pointer - 4. */
27410 rtx insn;
27411 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27413 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27414 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27416 rtx addr = gen_frame_mem (V2SImode,
27417 plus_constant (Pmode, hard_frame_pointer_rtx,
27418 - lrm_count * 4));
27419 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27420 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27421 gen_rtx_REG (V2SImode, i),
27422 NULL_RTX);
27423 lrm_count += 2;
27427 /* saved_regs_mask should contain IP which contains old stack pointer
27428 at the time of activation creation. Since SP and IP are adjacent registers,
27429 we can restore the value directly into SP. */
27430 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27431 saved_regs_mask &= ~(1 << IP_REGNUM);
27432 saved_regs_mask |= (1 << SP_REGNUM);
27434 /* There are two registers left in saved_regs_mask - LR and PC. We
27435 only need to restore LR (the return address), but to
27436 save time we can load it directly into PC, unless we need a
27437 special function exit sequence, or we are not really returning. */
27438 if (really_return
27439 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27440 && !crtl->calls_eh_return)
27441 /* Delete LR from the register mask, so that LR on
27442 the stack is loaded into the PC in the register mask. */
27443 saved_regs_mask &= ~(1 << LR_REGNUM);
27444 else
27445 saved_regs_mask &= ~(1 << PC_REGNUM);
27447 num_regs = bit_count (saved_regs_mask);
27448 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27450 rtx insn;
27451 emit_insn (gen_blockage ());
27452 /* Unwind the stack to just below the saved registers. */
27453 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27454 hard_frame_pointer_rtx,
27455 GEN_INT (- 4 * num_regs)));
27457 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27458 stack_pointer_rtx, hard_frame_pointer_rtx);
27461 arm_emit_multi_reg_pop (saved_regs_mask);
27463 if (IS_INTERRUPT (func_type))
27465 /* Interrupt handlers will have pushed the
27466 IP onto the stack, so restore it now. */
27467 rtx insn;
27468 rtx addr = gen_rtx_MEM (SImode,
27469 gen_rtx_POST_INC (SImode,
27470 stack_pointer_rtx));
27471 set_mem_alias_set (addr, get_frame_alias_set ());
27472 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27473 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27474 gen_rtx_REG (SImode, IP_REGNUM),
27475 NULL_RTX);
27478 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27479 return;
27481 if (crtl->calls_eh_return)
27482 emit_insn (gen_addsi3 (stack_pointer_rtx,
27483 stack_pointer_rtx,
27484 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27486 if (IS_STACKALIGN (func_type))
27487 /* Restore the original stack pointer. Before prologue, the stack was
27488 realigned and the original stack pointer saved in r0. For details,
27489 see comment in arm_expand_prologue. */
27490 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27492 emit_jump_insn (simple_return_rtx);
27495 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27496 function is not a sibcall. */
27497 void
27498 arm_expand_epilogue (bool really_return)
27500 unsigned long func_type;
27501 unsigned long saved_regs_mask;
27502 int num_regs = 0;
27503 int i;
27504 int amount;
27505 arm_stack_offsets *offsets;
27507 func_type = arm_current_func_type ();
27509 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27510 let output_return_instruction take care of instruction emission if any. */
27511 if (IS_NAKED (func_type)
27512 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27514 if (really_return)
27515 emit_jump_insn (simple_return_rtx);
27516 return;
27519 /* If we are throwing an exception, then we really must be doing a
27520 return, so we can't tail-call. */
27521 gcc_assert (!crtl->calls_eh_return || really_return);
27523 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27525 arm_expand_epilogue_apcs_frame (really_return);
27526 return;
27529 /* Get frame offsets for ARM. */
27530 offsets = arm_get_frame_offsets ();
27531 saved_regs_mask = offsets->saved_regs_mask;
27532 num_regs = bit_count (saved_regs_mask);
27534 if (frame_pointer_needed)
27536 rtx insn;
27537 /* Restore stack pointer if necessary. */
27538 if (TARGET_ARM)
27540 /* In ARM mode, frame pointer points to first saved register.
27541 Restore stack pointer to last saved register. */
27542 amount = offsets->frame - offsets->saved_regs;
27544 /* Force out any pending memory operations that reference stacked data
27545 before stack de-allocation occurs. */
27546 emit_insn (gen_blockage ());
27547 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27548 hard_frame_pointer_rtx,
27549 GEN_INT (amount)));
27550 arm_add_cfa_adjust_cfa_note (insn, amount,
27551 stack_pointer_rtx,
27552 hard_frame_pointer_rtx);
27554 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27555 deleted. */
27556 emit_insn (gen_force_register_use (stack_pointer_rtx));
27558 else
27560 /* In Thumb-2 mode, the frame pointer points to the last saved
27561 register. */
27562 amount = offsets->locals_base - offsets->saved_regs;
27563 if (amount)
27565 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27566 hard_frame_pointer_rtx,
27567 GEN_INT (amount)));
27568 arm_add_cfa_adjust_cfa_note (insn, amount,
27569 hard_frame_pointer_rtx,
27570 hard_frame_pointer_rtx);
27573 /* Force out any pending memory operations that reference stacked data
27574 before stack de-allocation occurs. */
27575 emit_insn (gen_blockage ());
27576 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27577 hard_frame_pointer_rtx));
27578 arm_add_cfa_adjust_cfa_note (insn, 0,
27579 stack_pointer_rtx,
27580 hard_frame_pointer_rtx);
27581 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27582 deleted. */
27583 emit_insn (gen_force_register_use (stack_pointer_rtx));
27586 else
27588 /* Pop off outgoing args and local frame to adjust stack pointer to
27589 last saved register. */
27590 amount = offsets->outgoing_args - offsets->saved_regs;
27591 if (amount)
27593 rtx tmp;
27594 /* Force out any pending memory operations that reference stacked data
27595 before stack de-allocation occurs. */
27596 emit_insn (gen_blockage ());
27597 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27598 stack_pointer_rtx,
27599 GEN_INT (amount)));
27600 arm_add_cfa_adjust_cfa_note (tmp, amount,
27601 stack_pointer_rtx, stack_pointer_rtx);
27602 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27603 not deleted. */
27604 emit_insn (gen_force_register_use (stack_pointer_rtx));
27608 if (TARGET_HARD_FLOAT && TARGET_VFP)
27610 /* Generate VFP register multi-pop. */
27611 int end_reg = LAST_VFP_REGNUM + 1;
27613 /* Scan the registers in reverse order. We need to match
27614 any groupings made in the prologue and generate matching
27615 vldm operations. The need to match groups is because,
27616 unlike pop, vldm can only do consecutive regs. */
27617 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27618 /* Look for a case where a reg does not need restoring. */
27619 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27620 && (!df_regs_ever_live_p (i + 1)
27621 || call_used_regs[i + 1]))
27623 /* Restore the regs discovered so far (from reg+2 to
27624 end_reg). */
27625 if (end_reg > i + 2)
27626 arm_emit_vfp_multi_reg_pop (i + 2,
27627 (end_reg - (i + 2)) / 2,
27628 stack_pointer_rtx);
27629 end_reg = i;
27632 /* Restore the remaining regs that we have discovered (or possibly
27633 even all of them, if the conditional in the for loop never
27634 fired). */
27635 if (end_reg > i + 2)
27636 arm_emit_vfp_multi_reg_pop (i + 2,
27637 (end_reg - (i + 2)) / 2,
27638 stack_pointer_rtx);
27641 if (TARGET_IWMMXT)
27642 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27643 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27645 rtx insn;
27646 rtx addr = gen_rtx_MEM (V2SImode,
27647 gen_rtx_POST_INC (SImode,
27648 stack_pointer_rtx));
27649 set_mem_alias_set (addr, get_frame_alias_set ());
27650 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27651 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27652 gen_rtx_REG (V2SImode, i),
27653 NULL_RTX);
27654 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27655 stack_pointer_rtx, stack_pointer_rtx);
27658 if (saved_regs_mask)
27660 rtx insn;
27661 bool return_in_pc = false;
27663 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27664 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27665 && !IS_STACKALIGN (func_type)
27666 && really_return
27667 && crtl->args.pretend_args_size == 0
27668 && saved_regs_mask & (1 << LR_REGNUM)
27669 && !crtl->calls_eh_return)
27671 saved_regs_mask &= ~(1 << LR_REGNUM);
27672 saved_regs_mask |= (1 << PC_REGNUM);
27673 return_in_pc = true;
27676 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27678 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27679 if (saved_regs_mask & (1 << i))
27681 rtx addr = gen_rtx_MEM (SImode,
27682 gen_rtx_POST_INC (SImode,
27683 stack_pointer_rtx));
27684 set_mem_alias_set (addr, get_frame_alias_set ());
27686 if (i == PC_REGNUM)
27688 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27689 XVECEXP (insn, 0, 0) = ret_rtx;
27690 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27691 gen_rtx_REG (SImode, i),
27692 addr);
27693 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27694 insn = emit_jump_insn (insn);
27696 else
27698 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27699 addr));
27700 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27701 gen_rtx_REG (SImode, i),
27702 NULL_RTX);
27703 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27704 stack_pointer_rtx,
27705 stack_pointer_rtx);
27709 else
27711 if (TARGET_LDRD
27712 && current_tune->prefer_ldrd_strd
27713 && !optimize_function_for_size_p (cfun))
27715 if (TARGET_THUMB2)
27716 thumb2_emit_ldrd_pop (saved_regs_mask);
27717 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27718 arm_emit_ldrd_pop (saved_regs_mask);
27719 else
27720 arm_emit_multi_reg_pop (saved_regs_mask);
27722 else
27723 arm_emit_multi_reg_pop (saved_regs_mask);
27726 if (return_in_pc == true)
27727 return;
27730 if (crtl->args.pretend_args_size)
27732 int i, j;
27733 rtx dwarf = NULL_RTX;
27734 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27735 stack_pointer_rtx,
27736 GEN_INT (crtl->args.pretend_args_size)));
27738 RTX_FRAME_RELATED_P (tmp) = 1;
27740 if (cfun->machine->uses_anonymous_args)
27742 /* Restore pretend args. Refer arm_expand_prologue on how to save
27743 pretend_args in stack. */
27744 int num_regs = crtl->args.pretend_args_size / 4;
27745 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27746 for (j = 0, i = 0; j < num_regs; i++)
27747 if (saved_regs_mask & (1 << i))
27749 rtx reg = gen_rtx_REG (SImode, i);
27750 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27751 j++;
27753 REG_NOTES (tmp) = dwarf;
27755 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27756 stack_pointer_rtx, stack_pointer_rtx);
27759 if (!really_return)
27760 return;
27762 if (crtl->calls_eh_return)
27763 emit_insn (gen_addsi3 (stack_pointer_rtx,
27764 stack_pointer_rtx,
27765 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27767 if (IS_STACKALIGN (func_type))
27768 /* Restore the original stack pointer. Before prologue, the stack was
27769 realigned and the original stack pointer saved in r0. For details,
27770 see comment in arm_expand_prologue. */
27771 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27773 emit_jump_insn (simple_return_rtx);
27776 /* Implementation of insn prologue_thumb1_interwork. This is the first
27777 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27779 const char *
27780 thumb1_output_interwork (void)
27782 const char * name;
27783 FILE *f = asm_out_file;
27785 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27786 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27787 == SYMBOL_REF);
27788 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27790 /* Generate code sequence to switch us into Thumb mode. */
27791 /* The .code 32 directive has already been emitted by
27792 ASM_DECLARE_FUNCTION_NAME. */
27793 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27794 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27796 /* Generate a label, so that the debugger will notice the
27797 change in instruction sets. This label is also used by
27798 the assembler to bypass the ARM code when this function
27799 is called from a Thumb encoded function elsewhere in the
27800 same file. Hence the definition of STUB_NAME here must
27801 agree with the definition in gas/config/tc-arm.c. */
27803 #define STUB_NAME ".real_start_of"
27805 fprintf (f, "\t.code\t16\n");
27806 #ifdef ARM_PE
27807 if (arm_dllexport_name_p (name))
27808 name = arm_strip_name_encoding (name);
27809 #endif
27810 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27811 fprintf (f, "\t.thumb_func\n");
27812 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27814 return "";
27817 /* Handle the case of a double word load into a low register from
27818 a computed memory address. The computed address may involve a
27819 register which is overwritten by the load. */
27820 const char *
27821 thumb_load_double_from_address (rtx *operands)
27823 rtx addr;
27824 rtx base;
27825 rtx offset;
27826 rtx arg1;
27827 rtx arg2;
27829 gcc_assert (REG_P (operands[0]));
27830 gcc_assert (MEM_P (operands[1]));
27832 /* Get the memory address. */
27833 addr = XEXP (operands[1], 0);
27835 /* Work out how the memory address is computed. */
27836 switch (GET_CODE (addr))
27838 case REG:
27839 operands[2] = adjust_address (operands[1], SImode, 4);
27841 if (REGNO (operands[0]) == REGNO (addr))
27843 output_asm_insn ("ldr\t%H0, %2", operands);
27844 output_asm_insn ("ldr\t%0, %1", operands);
27846 else
27848 output_asm_insn ("ldr\t%0, %1", operands);
27849 output_asm_insn ("ldr\t%H0, %2", operands);
27851 break;
27853 case CONST:
27854 /* Compute <address> + 4 for the high order load. */
27855 operands[2] = adjust_address (operands[1], SImode, 4);
27857 output_asm_insn ("ldr\t%0, %1", operands);
27858 output_asm_insn ("ldr\t%H0, %2", operands);
27859 break;
27861 case PLUS:
27862 arg1 = XEXP (addr, 0);
27863 arg2 = XEXP (addr, 1);
27865 if (CONSTANT_P (arg1))
27866 base = arg2, offset = arg1;
27867 else
27868 base = arg1, offset = arg2;
27870 gcc_assert (REG_P (base));
27872 /* Catch the case of <address> = <reg> + <reg> */
27873 if (REG_P (offset))
27875 int reg_offset = REGNO (offset);
27876 int reg_base = REGNO (base);
27877 int reg_dest = REGNO (operands[0]);
27879 /* Add the base and offset registers together into the
27880 higher destination register. */
27881 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27882 reg_dest + 1, reg_base, reg_offset);
27884 /* Load the lower destination register from the address in
27885 the higher destination register. */
27886 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27887 reg_dest, reg_dest + 1);
27889 /* Load the higher destination register from its own address
27890 plus 4. */
27891 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27892 reg_dest + 1, reg_dest + 1);
27894 else
27896 /* Compute <address> + 4 for the high order load. */
27897 operands[2] = adjust_address (operands[1], SImode, 4);
27899 /* If the computed address is held in the low order register
27900 then load the high order register first, otherwise always
27901 load the low order register first. */
27902 if (REGNO (operands[0]) == REGNO (base))
27904 output_asm_insn ("ldr\t%H0, %2", operands);
27905 output_asm_insn ("ldr\t%0, %1", operands);
27907 else
27909 output_asm_insn ("ldr\t%0, %1", operands);
27910 output_asm_insn ("ldr\t%H0, %2", operands);
27913 break;
27915 case LABEL_REF:
27916 /* With no registers to worry about we can just load the value
27917 directly. */
27918 operands[2] = adjust_address (operands[1], SImode, 4);
27920 output_asm_insn ("ldr\t%H0, %2", operands);
27921 output_asm_insn ("ldr\t%0, %1", operands);
27922 break;
27924 default:
27925 gcc_unreachable ();
27928 return "";
27931 const char *
27932 thumb_output_move_mem_multiple (int n, rtx *operands)
27934 rtx tmp;
27936 switch (n)
27938 case 2:
27939 if (REGNO (operands[4]) > REGNO (operands[5]))
27941 tmp = operands[4];
27942 operands[4] = operands[5];
27943 operands[5] = tmp;
27945 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27946 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27947 break;
27949 case 3:
27950 if (REGNO (operands[4]) > REGNO (operands[5]))
27952 tmp = operands[4];
27953 operands[4] = operands[5];
27954 operands[5] = tmp;
27956 if (REGNO (operands[5]) > REGNO (operands[6]))
27958 tmp = operands[5];
27959 operands[5] = operands[6];
27960 operands[6] = tmp;
27962 if (REGNO (operands[4]) > REGNO (operands[5]))
27964 tmp = operands[4];
27965 operands[4] = operands[5];
27966 operands[5] = tmp;
27969 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27970 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27971 break;
27973 default:
27974 gcc_unreachable ();
27977 return "";
27980 /* Output a call-via instruction for thumb state. */
27981 const char *
27982 thumb_call_via_reg (rtx reg)
27984 int regno = REGNO (reg);
27985 rtx *labelp;
27987 gcc_assert (regno < LR_REGNUM);
27989 /* If we are in the normal text section we can use a single instance
27990 per compilation unit. If we are doing function sections, then we need
27991 an entry per section, since we can't rely on reachability. */
27992 if (in_section == text_section)
27994 thumb_call_reg_needed = 1;
27996 if (thumb_call_via_label[regno] == NULL)
27997 thumb_call_via_label[regno] = gen_label_rtx ();
27998 labelp = thumb_call_via_label + regno;
28000 else
28002 if (cfun->machine->call_via[regno] == NULL)
28003 cfun->machine->call_via[regno] = gen_label_rtx ();
28004 labelp = cfun->machine->call_via + regno;
28007 output_asm_insn ("bl\t%a0", labelp);
28008 return "";
28011 /* Routines for generating rtl. */
28012 void
28013 thumb_expand_movmemqi (rtx *operands)
28015 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28016 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28017 HOST_WIDE_INT len = INTVAL (operands[2]);
28018 HOST_WIDE_INT offset = 0;
28020 while (len >= 12)
28022 emit_insn (gen_movmem12b (out, in, out, in));
28023 len -= 12;
28026 if (len >= 8)
28028 emit_insn (gen_movmem8b (out, in, out, in));
28029 len -= 8;
28032 if (len >= 4)
28034 rtx reg = gen_reg_rtx (SImode);
28035 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28036 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28037 len -= 4;
28038 offset += 4;
28041 if (len >= 2)
28043 rtx reg = gen_reg_rtx (HImode);
28044 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28045 plus_constant (Pmode, in,
28046 offset))));
28047 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28048 offset)),
28049 reg));
28050 len -= 2;
28051 offset += 2;
28054 if (len)
28056 rtx reg = gen_reg_rtx (QImode);
28057 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28058 plus_constant (Pmode, in,
28059 offset))));
28060 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28061 offset)),
28062 reg));
28066 void
28067 thumb_reload_out_hi (rtx *operands)
28069 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28072 /* Handle reading a half-word from memory during reload. */
28073 void
28074 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28076 gcc_unreachable ();
28079 /* Return the length of a function name prefix
28080 that starts with the character 'c'. */
28081 static int
28082 arm_get_strip_length (int c)
28084 switch (c)
28086 ARM_NAME_ENCODING_LENGTHS
28087 default: return 0;
28091 /* Return a pointer to a function's name with any
28092 and all prefix encodings stripped from it. */
28093 const char *
28094 arm_strip_name_encoding (const char *name)
28096 int skip;
28098 while ((skip = arm_get_strip_length (* name)))
28099 name += skip;
28101 return name;
28104 /* If there is a '*' anywhere in the name's prefix, then
28105 emit the stripped name verbatim, otherwise prepend an
28106 underscore if leading underscores are being used. */
28107 void
28108 arm_asm_output_labelref (FILE *stream, const char *name)
28110 int skip;
28111 int verbatim = 0;
28113 while ((skip = arm_get_strip_length (* name)))
28115 verbatim |= (*name == '*');
28116 name += skip;
28119 if (verbatim)
28120 fputs (name, stream);
28121 else
28122 asm_fprintf (stream, "%U%s", name);
28125 /* This function is used to emit an EABI tag and its associated value.
28126 We emit the numerical value of the tag in case the assembler does not
28127 support textual tags. (Eg gas prior to 2.20). If requested we include
28128 the tag name in a comment so that anyone reading the assembler output
28129 will know which tag is being set.
28131 This function is not static because arm-c.c needs it too. */
28133 void
28134 arm_emit_eabi_attribute (const char *name, int num, int val)
28136 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28137 if (flag_verbose_asm || flag_debug_asm)
28138 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28139 asm_fprintf (asm_out_file, "\n");
28142 static void
28143 arm_file_start (void)
28145 int val;
28147 if (TARGET_UNIFIED_ASM)
28148 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28150 if (TARGET_BPABI)
28152 const char *fpu_name;
28153 if (arm_selected_arch)
28155 /* armv7ve doesn't support any extensions. */
28156 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28158 /* Keep backward compatability for assemblers
28159 which don't support armv7ve. */
28160 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28161 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28162 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28163 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28164 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28166 else
28168 const char* pos = strchr (arm_selected_arch->name, '+');
28169 if (pos)
28171 char buf[15];
28172 gcc_assert (strlen (arm_selected_arch->name)
28173 <= sizeof (buf) / sizeof (*pos));
28174 strncpy (buf, arm_selected_arch->name,
28175 (pos - arm_selected_arch->name) * sizeof (*pos));
28176 buf[pos - arm_selected_arch->name] = '\0';
28177 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28178 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28180 else
28181 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28184 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28185 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28186 else
28188 const char* truncated_name
28189 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28190 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28193 if (TARGET_SOFT_FLOAT)
28195 fpu_name = "softvfp";
28197 else
28199 fpu_name = arm_fpu_desc->name;
28200 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28202 if (TARGET_HARD_FLOAT)
28203 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28204 if (TARGET_HARD_FLOAT_ABI)
28205 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28208 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28210 /* Some of these attributes only apply when the corresponding features
28211 are used. However we don't have any easy way of figuring this out.
28212 Conservatively record the setting that would have been used. */
28214 if (flag_rounding_math)
28215 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28217 if (!flag_unsafe_math_optimizations)
28219 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28220 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28222 if (flag_signaling_nans)
28223 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28225 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28226 flag_finite_math_only ? 1 : 3);
28228 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28229 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28230 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28231 flag_short_enums ? 1 : 2);
28233 /* Tag_ABI_optimization_goals. */
28234 if (optimize_size)
28235 val = 4;
28236 else if (optimize >= 2)
28237 val = 2;
28238 else if (optimize)
28239 val = 1;
28240 else
28241 val = 6;
28242 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28244 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28245 unaligned_access);
28247 if (arm_fp16_format)
28248 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28249 (int) arm_fp16_format);
28251 if (arm_lang_output_object_attributes_hook)
28252 arm_lang_output_object_attributes_hook();
28255 default_file_start ();
28258 static void
28259 arm_file_end (void)
28261 int regno;
28263 if (NEED_INDICATE_EXEC_STACK)
28264 /* Add .note.GNU-stack. */
28265 file_end_indicate_exec_stack ();
28267 if (! thumb_call_reg_needed)
28268 return;
28270 switch_to_section (text_section);
28271 asm_fprintf (asm_out_file, "\t.code 16\n");
28272 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28274 for (regno = 0; regno < LR_REGNUM; regno++)
28276 rtx label = thumb_call_via_label[regno];
28278 if (label != 0)
28280 targetm.asm_out.internal_label (asm_out_file, "L",
28281 CODE_LABEL_NUMBER (label));
28282 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28287 #ifndef ARM_PE
28288 /* Symbols in the text segment can be accessed without indirecting via the
28289 constant pool; it may take an extra binary operation, but this is still
28290 faster than indirecting via memory. Don't do this when not optimizing,
28291 since we won't be calculating al of the offsets necessary to do this
28292 simplification. */
28294 static void
28295 arm_encode_section_info (tree decl, rtx rtl, int first)
28297 if (optimize > 0 && TREE_CONSTANT (decl))
28298 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28300 default_encode_section_info (decl, rtl, first);
28302 #endif /* !ARM_PE */
28304 static void
28305 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28307 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28308 && !strcmp (prefix, "L"))
28310 arm_ccfsm_state = 0;
28311 arm_target_insn = NULL;
28313 default_internal_label (stream, prefix, labelno);
28316 /* Output code to add DELTA to the first argument, and then jump
28317 to FUNCTION. Used for C++ multiple inheritance. */
28318 static void
28319 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28320 HOST_WIDE_INT delta,
28321 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28322 tree function)
28324 static int thunk_label = 0;
28325 char label[256];
28326 char labelpc[256];
28327 int mi_delta = delta;
28328 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28329 int shift = 0;
28330 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28331 ? 1 : 0);
28332 if (mi_delta < 0)
28333 mi_delta = - mi_delta;
28335 final_start_function (emit_barrier (), file, 1);
28337 if (TARGET_THUMB1)
28339 int labelno = thunk_label++;
28340 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28341 /* Thunks are entered in arm mode when avaiable. */
28342 if (TARGET_THUMB1_ONLY)
28344 /* push r3 so we can use it as a temporary. */
28345 /* TODO: Omit this save if r3 is not used. */
28346 fputs ("\tpush {r3}\n", file);
28347 fputs ("\tldr\tr3, ", file);
28349 else
28351 fputs ("\tldr\tr12, ", file);
28353 assemble_name (file, label);
28354 fputc ('\n', file);
28355 if (flag_pic)
28357 /* If we are generating PIC, the ldr instruction below loads
28358 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28359 the address of the add + 8, so we have:
28361 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28362 = target + 1.
28364 Note that we have "+ 1" because some versions of GNU ld
28365 don't set the low bit of the result for R_ARM_REL32
28366 relocations against thumb function symbols.
28367 On ARMv6M this is +4, not +8. */
28368 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28369 assemble_name (file, labelpc);
28370 fputs (":\n", file);
28371 if (TARGET_THUMB1_ONLY)
28373 /* This is 2 insns after the start of the thunk, so we know it
28374 is 4-byte aligned. */
28375 fputs ("\tadd\tr3, pc, r3\n", file);
28376 fputs ("\tmov r12, r3\n", file);
28378 else
28379 fputs ("\tadd\tr12, pc, r12\n", file);
28381 else if (TARGET_THUMB1_ONLY)
28382 fputs ("\tmov r12, r3\n", file);
28384 if (TARGET_THUMB1_ONLY)
28386 if (mi_delta > 255)
28388 fputs ("\tldr\tr3, ", file);
28389 assemble_name (file, label);
28390 fputs ("+4\n", file);
28391 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28392 mi_op, this_regno, this_regno);
28394 else if (mi_delta != 0)
28396 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28397 mi_op, this_regno, this_regno,
28398 mi_delta);
28401 else
28403 /* TODO: Use movw/movt for large constants when available. */
28404 while (mi_delta != 0)
28406 if ((mi_delta & (3 << shift)) == 0)
28407 shift += 2;
28408 else
28410 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28411 mi_op, this_regno, this_regno,
28412 mi_delta & (0xff << shift));
28413 mi_delta &= ~(0xff << shift);
28414 shift += 8;
28418 if (TARGET_THUMB1)
28420 if (TARGET_THUMB1_ONLY)
28421 fputs ("\tpop\t{r3}\n", file);
28423 fprintf (file, "\tbx\tr12\n");
28424 ASM_OUTPUT_ALIGN (file, 2);
28425 assemble_name (file, label);
28426 fputs (":\n", file);
28427 if (flag_pic)
28429 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28430 rtx tem = XEXP (DECL_RTL (function), 0);
28431 tem = plus_constant (GET_MODE (tem), tem, -7);
28432 tem = gen_rtx_MINUS (GET_MODE (tem),
28433 tem,
28434 gen_rtx_SYMBOL_REF (Pmode,
28435 ggc_strdup (labelpc)));
28436 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28438 else
28439 /* Output ".word .LTHUNKn". */
28440 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28442 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28443 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28445 else
28447 fputs ("\tb\t", file);
28448 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28449 if (NEED_PLT_RELOC)
28450 fputs ("(PLT)", file);
28451 fputc ('\n', file);
28454 final_end_function ();
28458 arm_emit_vector_const (FILE *file, rtx x)
28460 int i;
28461 const char * pattern;
28463 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28465 switch (GET_MODE (x))
28467 case V2SImode: pattern = "%08x"; break;
28468 case V4HImode: pattern = "%04x"; break;
28469 case V8QImode: pattern = "%02x"; break;
28470 default: gcc_unreachable ();
28473 fprintf (file, "0x");
28474 for (i = CONST_VECTOR_NUNITS (x); i--;)
28476 rtx element;
28478 element = CONST_VECTOR_ELT (x, i);
28479 fprintf (file, pattern, INTVAL (element));
28482 return 1;
28485 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28486 HFmode constant pool entries are actually loaded with ldr. */
28487 void
28488 arm_emit_fp16_const (rtx c)
28490 REAL_VALUE_TYPE r;
28491 long bits;
28493 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28494 bits = real_to_target (NULL, &r, HFmode);
28495 if (WORDS_BIG_ENDIAN)
28496 assemble_zeros (2);
28497 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28498 if (!WORDS_BIG_ENDIAN)
28499 assemble_zeros (2);
28502 const char *
28503 arm_output_load_gr (rtx *operands)
28505 rtx reg;
28506 rtx offset;
28507 rtx wcgr;
28508 rtx sum;
28510 if (!MEM_P (operands [1])
28511 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28512 || !REG_P (reg = XEXP (sum, 0))
28513 || !CONST_INT_P (offset = XEXP (sum, 1))
28514 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28515 return "wldrw%?\t%0, %1";
28517 /* Fix up an out-of-range load of a GR register. */
28518 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28519 wcgr = operands[0];
28520 operands[0] = reg;
28521 output_asm_insn ("ldr%?\t%0, %1", operands);
28523 operands[0] = wcgr;
28524 operands[1] = reg;
28525 output_asm_insn ("tmcr%?\t%0, %1", operands);
28526 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28528 return "";
28531 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28533 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28534 named arg and all anonymous args onto the stack.
28535 XXX I know the prologue shouldn't be pushing registers, but it is faster
28536 that way. */
28538 static void
28539 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28540 enum machine_mode mode,
28541 tree type,
28542 int *pretend_size,
28543 int second_time ATTRIBUTE_UNUSED)
28545 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28546 int nregs;
28548 cfun->machine->uses_anonymous_args = 1;
28549 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28551 nregs = pcum->aapcs_ncrn;
28552 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28553 nregs++;
28555 else
28556 nregs = pcum->nregs;
28558 if (nregs < NUM_ARG_REGS)
28559 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28562 /* We can't rely on the caller doing the proper promotion when
28563 using APCS or ATPCS. */
28565 static bool
28566 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28568 return !TARGET_AAPCS_BASED;
28571 static enum machine_mode
28572 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28573 enum machine_mode mode,
28574 int *punsignedp ATTRIBUTE_UNUSED,
28575 const_tree fntype ATTRIBUTE_UNUSED,
28576 int for_return ATTRIBUTE_UNUSED)
28578 if (GET_MODE_CLASS (mode) == MODE_INT
28579 && GET_MODE_SIZE (mode) < 4)
28580 return SImode;
28582 return mode;
28585 /* AAPCS based ABIs use short enums by default. */
28587 static bool
28588 arm_default_short_enums (void)
28590 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28594 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28596 static bool
28597 arm_align_anon_bitfield (void)
28599 return TARGET_AAPCS_BASED;
28603 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28605 static tree
28606 arm_cxx_guard_type (void)
28608 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28612 /* The EABI says test the least significant bit of a guard variable. */
28614 static bool
28615 arm_cxx_guard_mask_bit (void)
28617 return TARGET_AAPCS_BASED;
28621 /* The EABI specifies that all array cookies are 8 bytes long. */
28623 static tree
28624 arm_get_cookie_size (tree type)
28626 tree size;
28628 if (!TARGET_AAPCS_BASED)
28629 return default_cxx_get_cookie_size (type);
28631 size = build_int_cst (sizetype, 8);
28632 return size;
28636 /* The EABI says that array cookies should also contain the element size. */
28638 static bool
28639 arm_cookie_has_size (void)
28641 return TARGET_AAPCS_BASED;
28645 /* The EABI says constructors and destructors should return a pointer to
28646 the object constructed/destroyed. */
28648 static bool
28649 arm_cxx_cdtor_returns_this (void)
28651 return TARGET_AAPCS_BASED;
28654 /* The EABI says that an inline function may never be the key
28655 method. */
28657 static bool
28658 arm_cxx_key_method_may_be_inline (void)
28660 return !TARGET_AAPCS_BASED;
28663 static void
28664 arm_cxx_determine_class_data_visibility (tree decl)
28666 if (!TARGET_AAPCS_BASED
28667 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28668 return;
28670 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28671 is exported. However, on systems without dynamic vague linkage,
28672 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28673 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28674 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28675 else
28676 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28677 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28680 static bool
28681 arm_cxx_class_data_always_comdat (void)
28683 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28684 vague linkage if the class has no key function. */
28685 return !TARGET_AAPCS_BASED;
28689 /* The EABI says __aeabi_atexit should be used to register static
28690 destructors. */
28692 static bool
28693 arm_cxx_use_aeabi_atexit (void)
28695 return TARGET_AAPCS_BASED;
28699 void
28700 arm_set_return_address (rtx source, rtx scratch)
28702 arm_stack_offsets *offsets;
28703 HOST_WIDE_INT delta;
28704 rtx addr;
28705 unsigned long saved_regs;
28707 offsets = arm_get_frame_offsets ();
28708 saved_regs = offsets->saved_regs_mask;
28710 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28711 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28712 else
28714 if (frame_pointer_needed)
28715 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28716 else
28718 /* LR will be the first saved register. */
28719 delta = offsets->outgoing_args - (offsets->frame + 4);
28722 if (delta >= 4096)
28724 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28725 GEN_INT (delta & ~4095)));
28726 addr = scratch;
28727 delta &= 4095;
28729 else
28730 addr = stack_pointer_rtx;
28732 addr = plus_constant (Pmode, addr, delta);
28734 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28739 void
28740 thumb_set_return_address (rtx source, rtx scratch)
28742 arm_stack_offsets *offsets;
28743 HOST_WIDE_INT delta;
28744 HOST_WIDE_INT limit;
28745 int reg;
28746 rtx addr;
28747 unsigned long mask;
28749 emit_use (source);
28751 offsets = arm_get_frame_offsets ();
28752 mask = offsets->saved_regs_mask;
28753 if (mask & (1 << LR_REGNUM))
28755 limit = 1024;
28756 /* Find the saved regs. */
28757 if (frame_pointer_needed)
28759 delta = offsets->soft_frame - offsets->saved_args;
28760 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28761 if (TARGET_THUMB1)
28762 limit = 128;
28764 else
28766 delta = offsets->outgoing_args - offsets->saved_args;
28767 reg = SP_REGNUM;
28769 /* Allow for the stack frame. */
28770 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28771 delta -= 16;
28772 /* The link register is always the first saved register. */
28773 delta -= 4;
28775 /* Construct the address. */
28776 addr = gen_rtx_REG (SImode, reg);
28777 if (delta > limit)
28779 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28780 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28781 addr = scratch;
28783 else
28784 addr = plus_constant (Pmode, addr, delta);
28786 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28788 else
28789 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28792 /* Implements target hook vector_mode_supported_p. */
28793 bool
28794 arm_vector_mode_supported_p (enum machine_mode mode)
28796 /* Neon also supports V2SImode, etc. listed in the clause below. */
28797 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28798 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28799 return true;
28801 if ((TARGET_NEON || TARGET_IWMMXT)
28802 && ((mode == V2SImode)
28803 || (mode == V4HImode)
28804 || (mode == V8QImode)))
28805 return true;
28807 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28808 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28809 || mode == V2HAmode))
28810 return true;
28812 return false;
28815 /* Implements target hook array_mode_supported_p. */
28817 static bool
28818 arm_array_mode_supported_p (enum machine_mode mode,
28819 unsigned HOST_WIDE_INT nelems)
28821 if (TARGET_NEON
28822 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28823 && (nelems >= 2 && nelems <= 4))
28824 return true;
28826 return false;
28829 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28830 registers when autovectorizing for Neon, at least until multiple vector
28831 widths are supported properly by the middle-end. */
28833 static enum machine_mode
28834 arm_preferred_simd_mode (enum machine_mode mode)
28836 if (TARGET_NEON)
28837 switch (mode)
28839 case SFmode:
28840 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28841 case SImode:
28842 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28843 case HImode:
28844 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28845 case QImode:
28846 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28847 case DImode:
28848 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28849 return V2DImode;
28850 break;
28852 default:;
28855 if (TARGET_REALLY_IWMMXT)
28856 switch (mode)
28858 case SImode:
28859 return V2SImode;
28860 case HImode:
28861 return V4HImode;
28862 case QImode:
28863 return V8QImode;
28865 default:;
28868 return word_mode;
28871 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28873 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28874 using r0-r4 for function arguments, r7 for the stack frame and don't have
28875 enough left over to do doubleword arithmetic. For Thumb-2 all the
28876 potentially problematic instructions accept high registers so this is not
28877 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28878 that require many low registers. */
28879 static bool
28880 arm_class_likely_spilled_p (reg_class_t rclass)
28882 if ((TARGET_THUMB1 && rclass == LO_REGS)
28883 || rclass == CC_REG)
28884 return true;
28886 return false;
28889 /* Implements target hook small_register_classes_for_mode_p. */
28890 bool
28891 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28893 return TARGET_THUMB1;
28896 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28897 ARM insns and therefore guarantee that the shift count is modulo 256.
28898 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28899 guarantee no particular behavior for out-of-range counts. */
28901 static unsigned HOST_WIDE_INT
28902 arm_shift_truncation_mask (enum machine_mode mode)
28904 return mode == SImode ? 255 : 0;
28908 /* Map internal gcc register numbers to DWARF2 register numbers. */
28910 unsigned int
28911 arm_dbx_register_number (unsigned int regno)
28913 if (regno < 16)
28914 return regno;
28916 if (IS_VFP_REGNUM (regno))
28918 /* See comment in arm_dwarf_register_span. */
28919 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28920 return 64 + regno - FIRST_VFP_REGNUM;
28921 else
28922 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28925 if (IS_IWMMXT_GR_REGNUM (regno))
28926 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28928 if (IS_IWMMXT_REGNUM (regno))
28929 return 112 + regno - FIRST_IWMMXT_REGNUM;
28931 gcc_unreachable ();
28934 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28935 GCC models tham as 64 32-bit registers, so we need to describe this to
28936 the DWARF generation code. Other registers can use the default. */
28937 static rtx
28938 arm_dwarf_register_span (rtx rtl)
28940 enum machine_mode mode;
28941 unsigned regno;
28942 rtx parts[16];
28943 int nregs;
28944 int i;
28946 regno = REGNO (rtl);
28947 if (!IS_VFP_REGNUM (regno))
28948 return NULL_RTX;
28950 /* XXX FIXME: The EABI defines two VFP register ranges:
28951 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28952 256-287: D0-D31
28953 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28954 corresponding D register. Until GDB supports this, we shall use the
28955 legacy encodings. We also use these encodings for D0-D15 for
28956 compatibility with older debuggers. */
28957 mode = GET_MODE (rtl);
28958 if (GET_MODE_SIZE (mode) < 8)
28959 return NULL_RTX;
28961 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28963 nregs = GET_MODE_SIZE (mode) / 4;
28964 for (i = 0; i < nregs; i += 2)
28965 if (TARGET_BIG_END)
28967 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28968 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28970 else
28972 parts[i] = gen_rtx_REG (SImode, regno + i);
28973 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28976 else
28978 nregs = GET_MODE_SIZE (mode) / 8;
28979 for (i = 0; i < nregs; i++)
28980 parts[i] = gen_rtx_REG (DImode, regno + i);
28983 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28986 #if ARM_UNWIND_INFO
28987 /* Emit unwind directives for a store-multiple instruction or stack pointer
28988 push during alignment.
28989 These should only ever be generated by the function prologue code, so
28990 expect them to have a particular form.
28991 The store-multiple instruction sometimes pushes pc as the last register,
28992 although it should not be tracked into unwind information, or for -Os
28993 sometimes pushes some dummy registers before first register that needs
28994 to be tracked in unwind information; such dummy registers are there just
28995 to avoid separate stack adjustment, and will not be restored in the
28996 epilogue. */
28998 static void
28999 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29001 int i;
29002 HOST_WIDE_INT offset;
29003 HOST_WIDE_INT nregs;
29004 int reg_size;
29005 unsigned reg;
29006 unsigned lastreg;
29007 unsigned padfirst = 0, padlast = 0;
29008 rtx e;
29010 e = XVECEXP (p, 0, 0);
29011 gcc_assert (GET_CODE (e) == SET);
29013 /* First insn will adjust the stack pointer. */
29014 gcc_assert (GET_CODE (e) == SET
29015 && REG_P (SET_DEST (e))
29016 && REGNO (SET_DEST (e)) == SP_REGNUM
29017 && GET_CODE (SET_SRC (e)) == PLUS);
29019 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29020 nregs = XVECLEN (p, 0) - 1;
29021 gcc_assert (nregs);
29023 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29024 if (reg < 16)
29026 /* For -Os dummy registers can be pushed at the beginning to
29027 avoid separate stack pointer adjustment. */
29028 e = XVECEXP (p, 0, 1);
29029 e = XEXP (SET_DEST (e), 0);
29030 if (GET_CODE (e) == PLUS)
29031 padfirst = INTVAL (XEXP (e, 1));
29032 gcc_assert (padfirst == 0 || optimize_size);
29033 /* The function prologue may also push pc, but not annotate it as it is
29034 never restored. We turn this into a stack pointer adjustment. */
29035 e = XVECEXP (p, 0, nregs);
29036 e = XEXP (SET_DEST (e), 0);
29037 if (GET_CODE (e) == PLUS)
29038 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29039 else
29040 padlast = offset - 4;
29041 gcc_assert (padlast == 0 || padlast == 4);
29042 if (padlast == 4)
29043 fprintf (asm_out_file, "\t.pad #4\n");
29044 reg_size = 4;
29045 fprintf (asm_out_file, "\t.save {");
29047 else if (IS_VFP_REGNUM (reg))
29049 reg_size = 8;
29050 fprintf (asm_out_file, "\t.vsave {");
29052 else
29053 /* Unknown register type. */
29054 gcc_unreachable ();
29056 /* If the stack increment doesn't match the size of the saved registers,
29057 something has gone horribly wrong. */
29058 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29060 offset = padfirst;
29061 lastreg = 0;
29062 /* The remaining insns will describe the stores. */
29063 for (i = 1; i <= nregs; i++)
29065 /* Expect (set (mem <addr>) (reg)).
29066 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29067 e = XVECEXP (p, 0, i);
29068 gcc_assert (GET_CODE (e) == SET
29069 && MEM_P (SET_DEST (e))
29070 && REG_P (SET_SRC (e)));
29072 reg = REGNO (SET_SRC (e));
29073 gcc_assert (reg >= lastreg);
29075 if (i != 1)
29076 fprintf (asm_out_file, ", ");
29077 /* We can't use %r for vfp because we need to use the
29078 double precision register names. */
29079 if (IS_VFP_REGNUM (reg))
29080 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29081 else
29082 asm_fprintf (asm_out_file, "%r", reg);
29084 #ifdef ENABLE_CHECKING
29085 /* Check that the addresses are consecutive. */
29086 e = XEXP (SET_DEST (e), 0);
29087 if (GET_CODE (e) == PLUS)
29088 gcc_assert (REG_P (XEXP (e, 0))
29089 && REGNO (XEXP (e, 0)) == SP_REGNUM
29090 && CONST_INT_P (XEXP (e, 1))
29091 && offset == INTVAL (XEXP (e, 1)));
29092 else
29093 gcc_assert (i == 1
29094 && REG_P (e)
29095 && REGNO (e) == SP_REGNUM);
29096 offset += reg_size;
29097 #endif
29099 fprintf (asm_out_file, "}\n");
29100 if (padfirst)
29101 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29104 /* Emit unwind directives for a SET. */
29106 static void
29107 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29109 rtx e0;
29110 rtx e1;
29111 unsigned reg;
29113 e0 = XEXP (p, 0);
29114 e1 = XEXP (p, 1);
29115 switch (GET_CODE (e0))
29117 case MEM:
29118 /* Pushing a single register. */
29119 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29120 || !REG_P (XEXP (XEXP (e0, 0), 0))
29121 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29122 abort ();
29124 asm_fprintf (asm_out_file, "\t.save ");
29125 if (IS_VFP_REGNUM (REGNO (e1)))
29126 asm_fprintf(asm_out_file, "{d%d}\n",
29127 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29128 else
29129 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29130 break;
29132 case REG:
29133 if (REGNO (e0) == SP_REGNUM)
29135 /* A stack increment. */
29136 if (GET_CODE (e1) != PLUS
29137 || !REG_P (XEXP (e1, 0))
29138 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29139 || !CONST_INT_P (XEXP (e1, 1)))
29140 abort ();
29142 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29143 -INTVAL (XEXP (e1, 1)));
29145 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29147 HOST_WIDE_INT offset;
29149 if (GET_CODE (e1) == PLUS)
29151 if (!REG_P (XEXP (e1, 0))
29152 || !CONST_INT_P (XEXP (e1, 1)))
29153 abort ();
29154 reg = REGNO (XEXP (e1, 0));
29155 offset = INTVAL (XEXP (e1, 1));
29156 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29157 HARD_FRAME_POINTER_REGNUM, reg,
29158 offset);
29160 else if (REG_P (e1))
29162 reg = REGNO (e1);
29163 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29164 HARD_FRAME_POINTER_REGNUM, reg);
29166 else
29167 abort ();
29169 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29171 /* Move from sp to reg. */
29172 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29174 else if (GET_CODE (e1) == PLUS
29175 && REG_P (XEXP (e1, 0))
29176 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29177 && CONST_INT_P (XEXP (e1, 1)))
29179 /* Set reg to offset from sp. */
29180 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29181 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29183 else
29184 abort ();
29185 break;
29187 default:
29188 abort ();
29193 /* Emit unwind directives for the given insn. */
29195 static void
29196 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29198 rtx note, pat;
29199 bool handled_one = false;
29201 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29202 return;
29204 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29205 && (TREE_NOTHROW (current_function_decl)
29206 || crtl->all_throwers_are_sibcalls))
29207 return;
29209 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29210 return;
29212 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29214 switch (REG_NOTE_KIND (note))
29216 case REG_FRAME_RELATED_EXPR:
29217 pat = XEXP (note, 0);
29218 goto found;
29220 case REG_CFA_REGISTER:
29221 pat = XEXP (note, 0);
29222 if (pat == NULL)
29224 pat = PATTERN (insn);
29225 if (GET_CODE (pat) == PARALLEL)
29226 pat = XVECEXP (pat, 0, 0);
29229 /* Only emitted for IS_STACKALIGN re-alignment. */
29231 rtx dest, src;
29232 unsigned reg;
29234 src = SET_SRC (pat);
29235 dest = SET_DEST (pat);
29237 gcc_assert (src == stack_pointer_rtx);
29238 reg = REGNO (dest);
29239 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29240 reg + 0x90, reg);
29242 handled_one = true;
29243 break;
29245 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29246 to get correct dwarf information for shrink-wrap. We should not
29247 emit unwind information for it because these are used either for
29248 pretend arguments or notes to adjust sp and restore registers from
29249 stack. */
29250 case REG_CFA_DEF_CFA:
29251 case REG_CFA_ADJUST_CFA:
29252 case REG_CFA_RESTORE:
29253 return;
29255 case REG_CFA_EXPRESSION:
29256 case REG_CFA_OFFSET:
29257 /* ??? Only handling here what we actually emit. */
29258 gcc_unreachable ();
29260 default:
29261 break;
29264 if (handled_one)
29265 return;
29266 pat = PATTERN (insn);
29267 found:
29269 switch (GET_CODE (pat))
29271 case SET:
29272 arm_unwind_emit_set (asm_out_file, pat);
29273 break;
29275 case SEQUENCE:
29276 /* Store multiple. */
29277 arm_unwind_emit_sequence (asm_out_file, pat);
29278 break;
29280 default:
29281 abort();
29286 /* Output a reference from a function exception table to the type_info
29287 object X. The EABI specifies that the symbol should be relocated by
29288 an R_ARM_TARGET2 relocation. */
29290 static bool
29291 arm_output_ttype (rtx x)
29293 fputs ("\t.word\t", asm_out_file);
29294 output_addr_const (asm_out_file, x);
29295 /* Use special relocations for symbol references. */
29296 if (!CONST_INT_P (x))
29297 fputs ("(TARGET2)", asm_out_file);
29298 fputc ('\n', asm_out_file);
29300 return TRUE;
29303 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29305 static void
29306 arm_asm_emit_except_personality (rtx personality)
29308 fputs ("\t.personality\t", asm_out_file);
29309 output_addr_const (asm_out_file, personality);
29310 fputc ('\n', asm_out_file);
29313 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29315 static void
29316 arm_asm_init_sections (void)
29318 exception_section = get_unnamed_section (0, output_section_asm_op,
29319 "\t.handlerdata");
29321 #endif /* ARM_UNWIND_INFO */
29323 /* Output unwind directives for the start/end of a function. */
29325 void
29326 arm_output_fn_unwind (FILE * f, bool prologue)
29328 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29329 return;
29331 if (prologue)
29332 fputs ("\t.fnstart\n", f);
29333 else
29335 /* If this function will never be unwound, then mark it as such.
29336 The came condition is used in arm_unwind_emit to suppress
29337 the frame annotations. */
29338 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29339 && (TREE_NOTHROW (current_function_decl)
29340 || crtl->all_throwers_are_sibcalls))
29341 fputs("\t.cantunwind\n", f);
29343 fputs ("\t.fnend\n", f);
29347 static bool
29348 arm_emit_tls_decoration (FILE *fp, rtx x)
29350 enum tls_reloc reloc;
29351 rtx val;
29353 val = XVECEXP (x, 0, 0);
29354 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29356 output_addr_const (fp, val);
29358 switch (reloc)
29360 case TLS_GD32:
29361 fputs ("(tlsgd)", fp);
29362 break;
29363 case TLS_LDM32:
29364 fputs ("(tlsldm)", fp);
29365 break;
29366 case TLS_LDO32:
29367 fputs ("(tlsldo)", fp);
29368 break;
29369 case TLS_IE32:
29370 fputs ("(gottpoff)", fp);
29371 break;
29372 case TLS_LE32:
29373 fputs ("(tpoff)", fp);
29374 break;
29375 case TLS_DESCSEQ:
29376 fputs ("(tlsdesc)", fp);
29377 break;
29378 default:
29379 gcc_unreachable ();
29382 switch (reloc)
29384 case TLS_GD32:
29385 case TLS_LDM32:
29386 case TLS_IE32:
29387 case TLS_DESCSEQ:
29388 fputs (" + (. - ", fp);
29389 output_addr_const (fp, XVECEXP (x, 0, 2));
29390 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29391 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29392 output_addr_const (fp, XVECEXP (x, 0, 3));
29393 fputc (')', fp);
29394 break;
29395 default:
29396 break;
29399 return TRUE;
29402 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29404 static void
29405 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29407 gcc_assert (size == 4);
29408 fputs ("\t.word\t", file);
29409 output_addr_const (file, x);
29410 fputs ("(tlsldo)", file);
29413 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29415 static bool
29416 arm_output_addr_const_extra (FILE *fp, rtx x)
29418 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29419 return arm_emit_tls_decoration (fp, x);
29420 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29422 char label[256];
29423 int labelno = INTVAL (XVECEXP (x, 0, 0));
29425 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29426 assemble_name_raw (fp, label);
29428 return TRUE;
29430 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29432 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29433 if (GOT_PCREL)
29434 fputs ("+.", fp);
29435 fputs ("-(", fp);
29436 output_addr_const (fp, XVECEXP (x, 0, 0));
29437 fputc (')', fp);
29438 return TRUE;
29440 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29442 output_addr_const (fp, XVECEXP (x, 0, 0));
29443 if (GOT_PCREL)
29444 fputs ("+.", fp);
29445 fputs ("-(", fp);
29446 output_addr_const (fp, XVECEXP (x, 0, 1));
29447 fputc (')', fp);
29448 return TRUE;
29450 else if (GET_CODE (x) == CONST_VECTOR)
29451 return arm_emit_vector_const (fp, x);
29453 return FALSE;
29456 /* Output assembly for a shift instruction.
29457 SET_FLAGS determines how the instruction modifies the condition codes.
29458 0 - Do not set condition codes.
29459 1 - Set condition codes.
29460 2 - Use smallest instruction. */
29461 const char *
29462 arm_output_shift(rtx * operands, int set_flags)
29464 char pattern[100];
29465 static const char flag_chars[3] = {'?', '.', '!'};
29466 const char *shift;
29467 HOST_WIDE_INT val;
29468 char c;
29470 c = flag_chars[set_flags];
29471 if (TARGET_UNIFIED_ASM)
29473 shift = shift_op(operands[3], &val);
29474 if (shift)
29476 if (val != -1)
29477 operands[2] = GEN_INT(val);
29478 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29480 else
29481 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29483 else
29484 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29485 output_asm_insn (pattern, operands);
29486 return "";
29489 /* Output assembly for a WMMX immediate shift instruction. */
29490 const char *
29491 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29493 int shift = INTVAL (operands[2]);
29494 char templ[50];
29495 enum machine_mode opmode = GET_MODE (operands[0]);
29497 gcc_assert (shift >= 0);
29499 /* If the shift value in the register versions is > 63 (for D qualifier),
29500 31 (for W qualifier) or 15 (for H qualifier). */
29501 if (((opmode == V4HImode) && (shift > 15))
29502 || ((opmode == V2SImode) && (shift > 31))
29503 || ((opmode == DImode) && (shift > 63)))
29505 if (wror_or_wsra)
29507 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29508 output_asm_insn (templ, operands);
29509 if (opmode == DImode)
29511 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29512 output_asm_insn (templ, operands);
29515 else
29517 /* The destination register will contain all zeros. */
29518 sprintf (templ, "wzero\t%%0");
29519 output_asm_insn (templ, operands);
29521 return "";
29524 if ((opmode == DImode) && (shift > 32))
29526 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29527 output_asm_insn (templ, operands);
29528 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29529 output_asm_insn (templ, operands);
29531 else
29533 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29534 output_asm_insn (templ, operands);
29536 return "";
29539 /* Output assembly for a WMMX tinsr instruction. */
29540 const char *
29541 arm_output_iwmmxt_tinsr (rtx *operands)
29543 int mask = INTVAL (operands[3]);
29544 int i;
29545 char templ[50];
29546 int units = mode_nunits[GET_MODE (operands[0])];
29547 gcc_assert ((mask & (mask - 1)) == 0);
29548 for (i = 0; i < units; ++i)
29550 if ((mask & 0x01) == 1)
29552 break;
29554 mask >>= 1;
29556 gcc_assert (i < units);
29558 switch (GET_MODE (operands[0]))
29560 case V8QImode:
29561 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29562 break;
29563 case V4HImode:
29564 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29565 break;
29566 case V2SImode:
29567 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29568 break;
29569 default:
29570 gcc_unreachable ();
29571 break;
29573 output_asm_insn (templ, operands);
29575 return "";
29578 /* Output a Thumb-1 casesi dispatch sequence. */
29579 const char *
29580 thumb1_output_casesi (rtx *operands)
29582 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29584 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29586 switch (GET_MODE(diff_vec))
29588 case QImode:
29589 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29590 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29591 case HImode:
29592 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29593 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29594 case SImode:
29595 return "bl\t%___gnu_thumb1_case_si";
29596 default:
29597 gcc_unreachable ();
29601 /* Output a Thumb-2 casesi instruction. */
29602 const char *
29603 thumb2_output_casesi (rtx *operands)
29605 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29607 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29609 output_asm_insn ("cmp\t%0, %1", operands);
29610 output_asm_insn ("bhi\t%l3", operands);
29611 switch (GET_MODE(diff_vec))
29613 case QImode:
29614 return "tbb\t[%|pc, %0]";
29615 case HImode:
29616 return "tbh\t[%|pc, %0, lsl #1]";
29617 case SImode:
29618 if (flag_pic)
29620 output_asm_insn ("adr\t%4, %l2", operands);
29621 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29622 output_asm_insn ("add\t%4, %4, %5", operands);
29623 return "bx\t%4";
29625 else
29627 output_asm_insn ("adr\t%4, %l2", operands);
29628 return "ldr\t%|pc, [%4, %0, lsl #2]";
29630 default:
29631 gcc_unreachable ();
29635 /* Most ARM cores are single issue, but some newer ones can dual issue.
29636 The scheduler descriptions rely on this being correct. */
29637 static int
29638 arm_issue_rate (void)
29640 switch (arm_tune)
29642 case cortexa15:
29643 case cortexa57:
29644 return 3;
29646 case cortexr4:
29647 case cortexr4f:
29648 case cortexr5:
29649 case genericv7a:
29650 case cortexa5:
29651 case cortexa7:
29652 case cortexa8:
29653 case cortexa9:
29654 case cortexa12:
29655 case cortexa53:
29656 case fa726te:
29657 case marvell_pj4:
29658 return 2;
29660 default:
29661 return 1;
29665 /* A table and a function to perform ARM-specific name mangling for
29666 NEON vector types in order to conform to the AAPCS (see "Procedure
29667 Call Standard for the ARM Architecture", Appendix A). To qualify
29668 for emission with the mangled names defined in that document, a
29669 vector type must not only be of the correct mode but also be
29670 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29671 typedef struct
29673 enum machine_mode mode;
29674 const char *element_type_name;
29675 const char *aapcs_name;
29676 } arm_mangle_map_entry;
29678 static arm_mangle_map_entry arm_mangle_map[] = {
29679 /* 64-bit containerized types. */
29680 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29681 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29682 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29683 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29684 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29685 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29686 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29687 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29688 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29689 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29691 /* 128-bit containerized types. */
29692 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29693 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29694 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29695 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29696 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29697 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29698 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29699 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29700 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29701 { VOIDmode, NULL, NULL }
29704 const char *
29705 arm_mangle_type (const_tree type)
29707 arm_mangle_map_entry *pos = arm_mangle_map;
29709 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29710 has to be managled as if it is in the "std" namespace. */
29711 if (TARGET_AAPCS_BASED
29712 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29713 return "St9__va_list";
29715 /* Half-precision float. */
29716 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29717 return "Dh";
29719 if (TREE_CODE (type) != VECTOR_TYPE)
29720 return NULL;
29722 /* Check the mode of the vector type, and the name of the vector
29723 element type, against the table. */
29724 while (pos->mode != VOIDmode)
29726 tree elt_type = TREE_TYPE (type);
29728 if (pos->mode == TYPE_MODE (type)
29729 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29730 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29731 pos->element_type_name))
29732 return pos->aapcs_name;
29734 pos++;
29737 /* Use the default mangling for unrecognized (possibly user-defined)
29738 vector types. */
29739 return NULL;
29742 /* Order of allocation of core registers for Thumb: this allocation is
29743 written over the corresponding initial entries of the array
29744 initialized with REG_ALLOC_ORDER. We allocate all low registers
29745 first. Saving and restoring a low register is usually cheaper than
29746 using a call-clobbered high register. */
29748 static const int thumb_core_reg_alloc_order[] =
29750 3, 2, 1, 0, 4, 5, 6, 7,
29751 14, 12, 8, 9, 10, 11
29754 /* Adjust register allocation order when compiling for Thumb. */
29756 void
29757 arm_order_regs_for_local_alloc (void)
29759 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29760 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29761 if (TARGET_THUMB)
29762 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29763 sizeof (thumb_core_reg_alloc_order));
29766 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29768 bool
29769 arm_frame_pointer_required (void)
29771 return (cfun->has_nonlocal_label
29772 || SUBTARGET_FRAME_POINTER_REQUIRED
29773 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29776 /* Only thumb1 can't support conditional execution, so return true if
29777 the target is not thumb1. */
29778 static bool
29779 arm_have_conditional_execution (void)
29781 return !TARGET_THUMB1;
29784 tree
29785 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29787 enum machine_mode in_mode, out_mode;
29788 int in_n, out_n;
29790 if (TREE_CODE (type_out) != VECTOR_TYPE
29791 || TREE_CODE (type_in) != VECTOR_TYPE
29792 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29793 return NULL_TREE;
29795 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29796 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29797 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29798 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29800 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29801 decl of the vectorized builtin for the appropriate vector mode.
29802 NULL_TREE is returned if no such builtin is available. */
29803 #undef ARM_CHECK_BUILTIN_MODE
29804 #define ARM_CHECK_BUILTIN_MODE(C) \
29805 (out_mode == SFmode && out_n == C \
29806 && in_mode == SFmode && in_n == C)
29808 #undef ARM_FIND_VRINT_VARIANT
29809 #define ARM_FIND_VRINT_VARIANT(N) \
29810 (ARM_CHECK_BUILTIN_MODE (2) \
29811 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29812 : (ARM_CHECK_BUILTIN_MODE (4) \
29813 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29814 : NULL_TREE))
29816 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29818 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29819 switch (fn)
29821 case BUILT_IN_FLOORF:
29822 return ARM_FIND_VRINT_VARIANT (vrintm);
29823 case BUILT_IN_CEILF:
29824 return ARM_FIND_VRINT_VARIANT (vrintp);
29825 case BUILT_IN_TRUNCF:
29826 return ARM_FIND_VRINT_VARIANT (vrintz);
29827 case BUILT_IN_ROUNDF:
29828 return ARM_FIND_VRINT_VARIANT (vrinta);
29829 default:
29830 return NULL_TREE;
29833 return NULL_TREE;
29835 #undef ARM_CHECK_BUILTIN_MODE
29836 #undef ARM_FIND_VRINT_VARIANT
29838 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29839 static HOST_WIDE_INT
29840 arm_vector_alignment (const_tree type)
29842 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29844 if (TARGET_AAPCS_BASED)
29845 align = MIN (align, 64);
29847 return align;
29850 static unsigned int
29851 arm_autovectorize_vector_sizes (void)
29853 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29856 static bool
29857 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29859 /* Vectors which aren't in packed structures will not be less aligned than
29860 the natural alignment of their element type, so this is safe. */
29861 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29862 return !is_packed;
29864 return default_builtin_vector_alignment_reachable (type, is_packed);
29867 static bool
29868 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29869 const_tree type, int misalignment,
29870 bool is_packed)
29872 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29874 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29876 if (is_packed)
29877 return align == 1;
29879 /* If the misalignment is unknown, we should be able to handle the access
29880 so long as it is not to a member of a packed data structure. */
29881 if (misalignment == -1)
29882 return true;
29884 /* Return true if the misalignment is a multiple of the natural alignment
29885 of the vector's element type. This is probably always going to be
29886 true in practice, since we've already established that this isn't a
29887 packed access. */
29888 return ((misalignment % align) == 0);
29891 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29892 is_packed);
29895 static void
29896 arm_conditional_register_usage (void)
29898 int regno;
29900 if (TARGET_THUMB1 && optimize_size)
29902 /* When optimizing for size on Thumb-1, it's better not
29903 to use the HI regs, because of the overhead of
29904 stacking them. */
29905 for (regno = FIRST_HI_REGNUM;
29906 regno <= LAST_HI_REGNUM; ++regno)
29907 fixed_regs[regno] = call_used_regs[regno] = 1;
29910 /* The link register can be clobbered by any branch insn,
29911 but we have no way to track that at present, so mark
29912 it as unavailable. */
29913 if (TARGET_THUMB1)
29914 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29916 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29918 /* VFPv3 registers are disabled when earlier VFP
29919 versions are selected due to the definition of
29920 LAST_VFP_REGNUM. */
29921 for (regno = FIRST_VFP_REGNUM;
29922 regno <= LAST_VFP_REGNUM; ++ regno)
29924 fixed_regs[regno] = 0;
29925 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29926 || regno >= FIRST_VFP_REGNUM + 32;
29930 if (TARGET_REALLY_IWMMXT)
29932 regno = FIRST_IWMMXT_GR_REGNUM;
29933 /* The 2002/10/09 revision of the XScale ABI has wCG0
29934 and wCG1 as call-preserved registers. The 2002/11/21
29935 revision changed this so that all wCG registers are
29936 scratch registers. */
29937 for (regno = FIRST_IWMMXT_GR_REGNUM;
29938 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29939 fixed_regs[regno] = 0;
29940 /* The XScale ABI has wR0 - wR9 as scratch registers,
29941 the rest as call-preserved registers. */
29942 for (regno = FIRST_IWMMXT_REGNUM;
29943 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29945 fixed_regs[regno] = 0;
29946 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29950 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29952 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29953 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29955 else if (TARGET_APCS_STACK)
29957 fixed_regs[10] = 1;
29958 call_used_regs[10] = 1;
29960 /* -mcaller-super-interworking reserves r11 for calls to
29961 _interwork_r11_call_via_rN(). Making the register global
29962 is an easy way of ensuring that it remains valid for all
29963 calls. */
29964 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29965 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29967 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29968 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29969 if (TARGET_CALLER_INTERWORKING)
29970 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29972 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29975 static reg_class_t
29976 arm_preferred_rename_class (reg_class_t rclass)
29978 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29979 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29980 and code size can be reduced. */
29981 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29982 return LO_REGS;
29983 else
29984 return NO_REGS;
29987 /* Compute the atrribute "length" of insn "*push_multi".
29988 So this function MUST be kept in sync with that insn pattern. */
29990 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29992 int i, regno, hi_reg;
29993 int num_saves = XVECLEN (parallel_op, 0);
29995 /* ARM mode. */
29996 if (TARGET_ARM)
29997 return 4;
29998 /* Thumb1 mode. */
29999 if (TARGET_THUMB1)
30000 return 2;
30002 /* Thumb2 mode. */
30003 regno = REGNO (first_op);
30004 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30005 for (i = 1; i < num_saves && !hi_reg; i++)
30007 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30008 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30011 if (!hi_reg)
30012 return 2;
30013 return 4;
30016 /* Compute the number of instructions emitted by output_move_double. */
30018 arm_count_output_move_double_insns (rtx *operands)
30020 int count;
30021 rtx ops[2];
30022 /* output_move_double may modify the operands array, so call it
30023 here on a copy of the array. */
30024 ops[0] = operands[0];
30025 ops[1] = operands[1];
30026 output_move_double (ops, false, &count);
30027 return count;
30031 vfp3_const_double_for_fract_bits (rtx operand)
30033 REAL_VALUE_TYPE r0;
30035 if (!CONST_DOUBLE_P (operand))
30036 return 0;
30038 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30039 if (exact_real_inverse (DFmode, &r0))
30041 if (exact_real_truncate (DFmode, &r0))
30043 HOST_WIDE_INT value = real_to_integer (&r0);
30044 value = value & 0xffffffff;
30045 if ((value != 0) && ( (value & (value - 1)) == 0))
30046 return int_log2 (value);
30049 return 0;
30053 vfp3_const_double_for_bits (rtx operand)
30055 REAL_VALUE_TYPE r0;
30057 if (!CONST_DOUBLE_P (operand))
30058 return 0;
30060 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30061 if (exact_real_truncate (DFmode, &r0))
30063 HOST_WIDE_INT value = real_to_integer (&r0);
30064 value = value & 0xffffffff;
30065 if ((value != 0) && ( (value & (value - 1)) == 0))
30066 return int_log2 (value);
30069 return 0;
30072 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30074 static void
30075 arm_pre_atomic_barrier (enum memmodel model)
30077 if (need_atomic_barrier_p (model, true))
30078 emit_insn (gen_memory_barrier ());
30081 static void
30082 arm_post_atomic_barrier (enum memmodel model)
30084 if (need_atomic_barrier_p (model, false))
30085 emit_insn (gen_memory_barrier ());
30088 /* Emit the load-exclusive and store-exclusive instructions.
30089 Use acquire and release versions if necessary. */
30091 static void
30092 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30094 rtx (*gen) (rtx, rtx);
30096 if (acq)
30098 switch (mode)
30100 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30101 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30102 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30103 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30104 default:
30105 gcc_unreachable ();
30108 else
30110 switch (mode)
30112 case QImode: gen = gen_arm_load_exclusiveqi; break;
30113 case HImode: gen = gen_arm_load_exclusivehi; break;
30114 case SImode: gen = gen_arm_load_exclusivesi; break;
30115 case DImode: gen = gen_arm_load_exclusivedi; break;
30116 default:
30117 gcc_unreachable ();
30121 emit_insn (gen (rval, mem));
30124 static void
30125 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30126 rtx mem, bool rel)
30128 rtx (*gen) (rtx, rtx, rtx);
30130 if (rel)
30132 switch (mode)
30134 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30135 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30136 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30137 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30138 default:
30139 gcc_unreachable ();
30142 else
30144 switch (mode)
30146 case QImode: gen = gen_arm_store_exclusiveqi; break;
30147 case HImode: gen = gen_arm_store_exclusivehi; break;
30148 case SImode: gen = gen_arm_store_exclusivesi; break;
30149 case DImode: gen = gen_arm_store_exclusivedi; break;
30150 default:
30151 gcc_unreachable ();
30155 emit_insn (gen (bval, rval, mem));
30158 /* Mark the previous jump instruction as unlikely. */
30160 static void
30161 emit_unlikely_jump (rtx insn)
30163 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30165 insn = emit_jump_insn (insn);
30166 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30169 /* Expand a compare and swap pattern. */
30171 void
30172 arm_expand_compare_and_swap (rtx operands[])
30174 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30175 enum machine_mode mode;
30176 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30178 bval = operands[0];
30179 rval = operands[1];
30180 mem = operands[2];
30181 oldval = operands[3];
30182 newval = operands[4];
30183 is_weak = operands[5];
30184 mod_s = operands[6];
30185 mod_f = operands[7];
30186 mode = GET_MODE (mem);
30188 /* Normally the succ memory model must be stronger than fail, but in the
30189 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30190 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30192 if (TARGET_HAVE_LDACQ
30193 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30194 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30195 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30197 switch (mode)
30199 case QImode:
30200 case HImode:
30201 /* For narrow modes, we're going to perform the comparison in SImode,
30202 so do the zero-extension now. */
30203 rval = gen_reg_rtx (SImode);
30204 oldval = convert_modes (SImode, mode, oldval, true);
30205 /* FALLTHRU */
30207 case SImode:
30208 /* Force the value into a register if needed. We waited until after
30209 the zero-extension above to do this properly. */
30210 if (!arm_add_operand (oldval, SImode))
30211 oldval = force_reg (SImode, oldval);
30212 break;
30214 case DImode:
30215 if (!cmpdi_operand (oldval, mode))
30216 oldval = force_reg (mode, oldval);
30217 break;
30219 default:
30220 gcc_unreachable ();
30223 switch (mode)
30225 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30226 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30227 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30228 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30229 default:
30230 gcc_unreachable ();
30233 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30235 if (mode == QImode || mode == HImode)
30236 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30238 /* In all cases, we arrange for success to be signaled by Z set.
30239 This arrangement allows for the boolean result to be used directly
30240 in a subsequent branch, post optimization. */
30241 x = gen_rtx_REG (CCmode, CC_REGNUM);
30242 x = gen_rtx_EQ (SImode, x, const0_rtx);
30243 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30246 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30247 another memory store between the load-exclusive and store-exclusive can
30248 reset the monitor from Exclusive to Open state. This means we must wait
30249 until after reload to split the pattern, lest we get a register spill in
30250 the middle of the atomic sequence. */
30252 void
30253 arm_split_compare_and_swap (rtx operands[])
30255 rtx rval, mem, oldval, newval, scratch;
30256 enum machine_mode mode;
30257 enum memmodel mod_s, mod_f;
30258 bool is_weak;
30259 rtx label1, label2, x, cond;
30261 rval = operands[0];
30262 mem = operands[1];
30263 oldval = operands[2];
30264 newval = operands[3];
30265 is_weak = (operands[4] != const0_rtx);
30266 mod_s = (enum memmodel) INTVAL (operands[5]);
30267 mod_f = (enum memmodel) INTVAL (operands[6]);
30268 scratch = operands[7];
30269 mode = GET_MODE (mem);
30271 bool use_acquire = TARGET_HAVE_LDACQ
30272 && !(mod_s == MEMMODEL_RELAXED
30273 || mod_s == MEMMODEL_CONSUME
30274 || mod_s == MEMMODEL_RELEASE);
30276 bool use_release = TARGET_HAVE_LDACQ
30277 && !(mod_s == MEMMODEL_RELAXED
30278 || mod_s == MEMMODEL_CONSUME
30279 || mod_s == MEMMODEL_ACQUIRE);
30281 /* Checks whether a barrier is needed and emits one accordingly. */
30282 if (!(use_acquire || use_release))
30283 arm_pre_atomic_barrier (mod_s);
30285 label1 = NULL_RTX;
30286 if (!is_weak)
30288 label1 = gen_label_rtx ();
30289 emit_label (label1);
30291 label2 = gen_label_rtx ();
30293 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30295 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30296 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30297 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30298 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30299 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30301 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30303 /* Weak or strong, we want EQ to be true for success, so that we
30304 match the flags that we got from the compare above. */
30305 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30306 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30307 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30309 if (!is_weak)
30311 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30312 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30313 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30314 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30317 if (mod_f != MEMMODEL_RELAXED)
30318 emit_label (label2);
30320 /* Checks whether a barrier is needed and emits one accordingly. */
30321 if (!(use_acquire || use_release))
30322 arm_post_atomic_barrier (mod_s);
30324 if (mod_f == MEMMODEL_RELAXED)
30325 emit_label (label2);
30328 void
30329 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30330 rtx value, rtx model_rtx, rtx cond)
30332 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30333 enum machine_mode mode = GET_MODE (mem);
30334 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30335 rtx label, x;
30337 bool use_acquire = TARGET_HAVE_LDACQ
30338 && !(model == MEMMODEL_RELAXED
30339 || model == MEMMODEL_CONSUME
30340 || model == MEMMODEL_RELEASE);
30342 bool use_release = TARGET_HAVE_LDACQ
30343 && !(model == MEMMODEL_RELAXED
30344 || model == MEMMODEL_CONSUME
30345 || model == MEMMODEL_ACQUIRE);
30347 /* Checks whether a barrier is needed and emits one accordingly. */
30348 if (!(use_acquire || use_release))
30349 arm_pre_atomic_barrier (model);
30351 label = gen_label_rtx ();
30352 emit_label (label);
30354 if (new_out)
30355 new_out = gen_lowpart (wmode, new_out);
30356 if (old_out)
30357 old_out = gen_lowpart (wmode, old_out);
30358 else
30359 old_out = new_out;
30360 value = simplify_gen_subreg (wmode, value, mode, 0);
30362 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30364 switch (code)
30366 case SET:
30367 new_out = value;
30368 break;
30370 case NOT:
30371 x = gen_rtx_AND (wmode, old_out, value);
30372 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30373 x = gen_rtx_NOT (wmode, new_out);
30374 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30375 break;
30377 case MINUS:
30378 if (CONST_INT_P (value))
30380 value = GEN_INT (-INTVAL (value));
30381 code = PLUS;
30383 /* FALLTHRU */
30385 case PLUS:
30386 if (mode == DImode)
30388 /* DImode plus/minus need to clobber flags. */
30389 /* The adddi3 and subdi3 patterns are incorrectly written so that
30390 they require matching operands, even when we could easily support
30391 three operands. Thankfully, this can be fixed up post-splitting,
30392 as the individual add+adc patterns do accept three operands and
30393 post-reload cprop can make these moves go away. */
30394 emit_move_insn (new_out, old_out);
30395 if (code == PLUS)
30396 x = gen_adddi3 (new_out, new_out, value);
30397 else
30398 x = gen_subdi3 (new_out, new_out, value);
30399 emit_insn (x);
30400 break;
30402 /* FALLTHRU */
30404 default:
30405 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30406 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30407 break;
30410 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30411 use_release);
30413 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30414 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30416 /* Checks whether a barrier is needed and emits one accordingly. */
30417 if (!(use_acquire || use_release))
30418 arm_post_atomic_barrier (model);
30421 #define MAX_VECT_LEN 16
30423 struct expand_vec_perm_d
30425 rtx target, op0, op1;
30426 unsigned char perm[MAX_VECT_LEN];
30427 enum machine_mode vmode;
30428 unsigned char nelt;
30429 bool one_vector_p;
30430 bool testing_p;
30433 /* Generate a variable permutation. */
30435 static void
30436 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30438 enum machine_mode vmode = GET_MODE (target);
30439 bool one_vector_p = rtx_equal_p (op0, op1);
30441 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30442 gcc_checking_assert (GET_MODE (op0) == vmode);
30443 gcc_checking_assert (GET_MODE (op1) == vmode);
30444 gcc_checking_assert (GET_MODE (sel) == vmode);
30445 gcc_checking_assert (TARGET_NEON);
30447 if (one_vector_p)
30449 if (vmode == V8QImode)
30450 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30451 else
30452 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30454 else
30456 rtx pair;
30458 if (vmode == V8QImode)
30460 pair = gen_reg_rtx (V16QImode);
30461 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30462 pair = gen_lowpart (TImode, pair);
30463 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30465 else
30467 pair = gen_reg_rtx (OImode);
30468 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30469 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30474 void
30475 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30477 enum machine_mode vmode = GET_MODE (target);
30478 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30479 bool one_vector_p = rtx_equal_p (op0, op1);
30480 rtx rmask[MAX_VECT_LEN], mask;
30482 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30483 numbering of elements for big-endian, we must reverse the order. */
30484 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30486 /* The VTBL instruction does not use a modulo index, so we must take care
30487 of that ourselves. */
30488 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30489 for (i = 0; i < nelt; ++i)
30490 rmask[i] = mask;
30491 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30492 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30494 arm_expand_vec_perm_1 (target, op0, op1, sel);
30497 /* Generate or test for an insn that supports a constant permutation. */
30499 /* Recognize patterns for the VUZP insns. */
30501 static bool
30502 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30504 unsigned int i, odd, mask, nelt = d->nelt;
30505 rtx out0, out1, in0, in1, x;
30506 rtx (*gen)(rtx, rtx, rtx, rtx);
30508 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30509 return false;
30511 /* Note that these are little-endian tests. Adjust for big-endian later. */
30512 if (d->perm[0] == 0)
30513 odd = 0;
30514 else if (d->perm[0] == 1)
30515 odd = 1;
30516 else
30517 return false;
30518 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30520 for (i = 0; i < nelt; i++)
30522 unsigned elt = (i * 2 + odd) & mask;
30523 if (d->perm[i] != elt)
30524 return false;
30527 /* Success! */
30528 if (d->testing_p)
30529 return true;
30531 switch (d->vmode)
30533 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30534 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30535 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30536 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30537 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30538 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30539 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30540 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30541 default:
30542 gcc_unreachable ();
30545 in0 = d->op0;
30546 in1 = d->op1;
30547 if (BYTES_BIG_ENDIAN)
30549 x = in0, in0 = in1, in1 = x;
30550 odd = !odd;
30553 out0 = d->target;
30554 out1 = gen_reg_rtx (d->vmode);
30555 if (odd)
30556 x = out0, out0 = out1, out1 = x;
30558 emit_insn (gen (out0, in0, in1, out1));
30559 return true;
30562 /* Recognize patterns for the VZIP insns. */
30564 static bool
30565 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30567 unsigned int i, high, mask, nelt = d->nelt;
30568 rtx out0, out1, in0, in1, x;
30569 rtx (*gen)(rtx, rtx, rtx, rtx);
30571 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30572 return false;
30574 /* Note that these are little-endian tests. Adjust for big-endian later. */
30575 high = nelt / 2;
30576 if (d->perm[0] == high)
30578 else if (d->perm[0] == 0)
30579 high = 0;
30580 else
30581 return false;
30582 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30584 for (i = 0; i < nelt / 2; i++)
30586 unsigned elt = (i + high) & mask;
30587 if (d->perm[i * 2] != elt)
30588 return false;
30589 elt = (elt + nelt) & mask;
30590 if (d->perm[i * 2 + 1] != elt)
30591 return false;
30594 /* Success! */
30595 if (d->testing_p)
30596 return true;
30598 switch (d->vmode)
30600 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30601 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30602 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30603 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30604 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30605 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30606 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30607 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30608 default:
30609 gcc_unreachable ();
30612 in0 = d->op0;
30613 in1 = d->op1;
30614 if (BYTES_BIG_ENDIAN)
30616 x = in0, in0 = in1, in1 = x;
30617 high = !high;
30620 out0 = d->target;
30621 out1 = gen_reg_rtx (d->vmode);
30622 if (high)
30623 x = out0, out0 = out1, out1 = x;
30625 emit_insn (gen (out0, in0, in1, out1));
30626 return true;
30629 /* Recognize patterns for the VREV insns. */
30631 static bool
30632 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30634 unsigned int i, j, diff, nelt = d->nelt;
30635 rtx (*gen)(rtx, rtx, rtx);
30637 if (!d->one_vector_p)
30638 return false;
30640 diff = d->perm[0];
30641 switch (diff)
30643 case 7:
30644 switch (d->vmode)
30646 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30647 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30648 default:
30649 return false;
30651 break;
30652 case 3:
30653 switch (d->vmode)
30655 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30656 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30657 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30658 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30659 default:
30660 return false;
30662 break;
30663 case 1:
30664 switch (d->vmode)
30666 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30667 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30668 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30669 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30670 case V4SImode: gen = gen_neon_vrev64v4si; break;
30671 case V2SImode: gen = gen_neon_vrev64v2si; break;
30672 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30673 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30674 default:
30675 return false;
30677 break;
30678 default:
30679 return false;
30682 for (i = 0; i < nelt ; i += diff + 1)
30683 for (j = 0; j <= diff; j += 1)
30685 /* This is guaranteed to be true as the value of diff
30686 is 7, 3, 1 and we should have enough elements in the
30687 queue to generate this. Getting a vector mask with a
30688 value of diff other than these values implies that
30689 something is wrong by the time we get here. */
30690 gcc_assert (i + j < nelt);
30691 if (d->perm[i + j] != i + diff - j)
30692 return false;
30695 /* Success! */
30696 if (d->testing_p)
30697 return true;
30699 /* ??? The third operand is an artifact of the builtin infrastructure
30700 and is ignored by the actual instruction. */
30701 emit_insn (gen (d->target, d->op0, const0_rtx));
30702 return true;
30705 /* Recognize patterns for the VTRN insns. */
30707 static bool
30708 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30710 unsigned int i, odd, mask, nelt = d->nelt;
30711 rtx out0, out1, in0, in1, x;
30712 rtx (*gen)(rtx, rtx, rtx, rtx);
30714 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30715 return false;
30717 /* Note that these are little-endian tests. Adjust for big-endian later. */
30718 if (d->perm[0] == 0)
30719 odd = 0;
30720 else if (d->perm[0] == 1)
30721 odd = 1;
30722 else
30723 return false;
30724 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30726 for (i = 0; i < nelt; i += 2)
30728 if (d->perm[i] != i + odd)
30729 return false;
30730 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30731 return false;
30734 /* Success! */
30735 if (d->testing_p)
30736 return true;
30738 switch (d->vmode)
30740 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30741 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30742 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30743 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30744 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30745 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30746 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30747 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30748 default:
30749 gcc_unreachable ();
30752 in0 = d->op0;
30753 in1 = d->op1;
30754 if (BYTES_BIG_ENDIAN)
30756 x = in0, in0 = in1, in1 = x;
30757 odd = !odd;
30760 out0 = d->target;
30761 out1 = gen_reg_rtx (d->vmode);
30762 if (odd)
30763 x = out0, out0 = out1, out1 = x;
30765 emit_insn (gen (out0, in0, in1, out1));
30766 return true;
30769 /* Recognize patterns for the VEXT insns. */
30771 static bool
30772 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30774 unsigned int i, nelt = d->nelt;
30775 rtx (*gen) (rtx, rtx, rtx, rtx);
30776 rtx offset;
30778 unsigned int location;
30780 unsigned int next = d->perm[0] + 1;
30782 /* TODO: Handle GCC's numbering of elements for big-endian. */
30783 if (BYTES_BIG_ENDIAN)
30784 return false;
30786 /* Check if the extracted indexes are increasing by one. */
30787 for (i = 1; i < nelt; next++, i++)
30789 /* If we hit the most significant element of the 2nd vector in
30790 the previous iteration, no need to test further. */
30791 if (next == 2 * nelt)
30792 return false;
30794 /* If we are operating on only one vector: it could be a
30795 rotation. If there are only two elements of size < 64, let
30796 arm_evpc_neon_vrev catch it. */
30797 if (d->one_vector_p && (next == nelt))
30799 if ((nelt == 2) && (d->vmode != V2DImode))
30800 return false;
30801 else
30802 next = 0;
30805 if (d->perm[i] != next)
30806 return false;
30809 location = d->perm[0];
30811 switch (d->vmode)
30813 case V16QImode: gen = gen_neon_vextv16qi; break;
30814 case V8QImode: gen = gen_neon_vextv8qi; break;
30815 case V4HImode: gen = gen_neon_vextv4hi; break;
30816 case V8HImode: gen = gen_neon_vextv8hi; break;
30817 case V2SImode: gen = gen_neon_vextv2si; break;
30818 case V4SImode: gen = gen_neon_vextv4si; break;
30819 case V2SFmode: gen = gen_neon_vextv2sf; break;
30820 case V4SFmode: gen = gen_neon_vextv4sf; break;
30821 case V2DImode: gen = gen_neon_vextv2di; break;
30822 default:
30823 return false;
30826 /* Success! */
30827 if (d->testing_p)
30828 return true;
30830 offset = GEN_INT (location);
30831 emit_insn (gen (d->target, d->op0, d->op1, offset));
30832 return true;
30835 /* The NEON VTBL instruction is a fully variable permuation that's even
30836 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30837 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30838 can do slightly better by expanding this as a constant where we don't
30839 have to apply a mask. */
30841 static bool
30842 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30844 rtx rperm[MAX_VECT_LEN], sel;
30845 enum machine_mode vmode = d->vmode;
30846 unsigned int i, nelt = d->nelt;
30848 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30849 numbering of elements for big-endian, we must reverse the order. */
30850 if (BYTES_BIG_ENDIAN)
30851 return false;
30853 if (d->testing_p)
30854 return true;
30856 /* Generic code will try constant permutation twice. Once with the
30857 original mode and again with the elements lowered to QImode.
30858 So wait and don't do the selector expansion ourselves. */
30859 if (vmode != V8QImode && vmode != V16QImode)
30860 return false;
30862 for (i = 0; i < nelt; ++i)
30863 rperm[i] = GEN_INT (d->perm[i]);
30864 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30865 sel = force_reg (vmode, sel);
30867 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30868 return true;
30871 static bool
30872 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30874 /* Check if the input mask matches vext before reordering the
30875 operands. */
30876 if (TARGET_NEON)
30877 if (arm_evpc_neon_vext (d))
30878 return true;
30880 /* The pattern matching functions above are written to look for a small
30881 number to begin the sequence (0, 1, N/2). If we begin with an index
30882 from the second operand, we can swap the operands. */
30883 if (d->perm[0] >= d->nelt)
30885 unsigned i, nelt = d->nelt;
30886 rtx x;
30888 for (i = 0; i < nelt; ++i)
30889 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30891 x = d->op0;
30892 d->op0 = d->op1;
30893 d->op1 = x;
30896 if (TARGET_NEON)
30898 if (arm_evpc_neon_vuzp (d))
30899 return true;
30900 if (arm_evpc_neon_vzip (d))
30901 return true;
30902 if (arm_evpc_neon_vrev (d))
30903 return true;
30904 if (arm_evpc_neon_vtrn (d))
30905 return true;
30906 return arm_evpc_neon_vtbl (d);
30908 return false;
30911 /* Expand a vec_perm_const pattern. */
30913 bool
30914 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30916 struct expand_vec_perm_d d;
30917 int i, nelt, which;
30919 d.target = target;
30920 d.op0 = op0;
30921 d.op1 = op1;
30923 d.vmode = GET_MODE (target);
30924 gcc_assert (VECTOR_MODE_P (d.vmode));
30925 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30926 d.testing_p = false;
30928 for (i = which = 0; i < nelt; ++i)
30930 rtx e = XVECEXP (sel, 0, i);
30931 int ei = INTVAL (e) & (2 * nelt - 1);
30932 which |= (ei < nelt ? 1 : 2);
30933 d.perm[i] = ei;
30936 switch (which)
30938 default:
30939 gcc_unreachable();
30941 case 3:
30942 d.one_vector_p = false;
30943 if (!rtx_equal_p (op0, op1))
30944 break;
30946 /* The elements of PERM do not suggest that only the first operand
30947 is used, but both operands are identical. Allow easier matching
30948 of the permutation by folding the permutation into the single
30949 input vector. */
30950 /* FALLTHRU */
30951 case 2:
30952 for (i = 0; i < nelt; ++i)
30953 d.perm[i] &= nelt - 1;
30954 d.op0 = op1;
30955 d.one_vector_p = true;
30956 break;
30958 case 1:
30959 d.op1 = op0;
30960 d.one_vector_p = true;
30961 break;
30964 return arm_expand_vec_perm_const_1 (&d);
30967 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30969 static bool
30970 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30971 const unsigned char *sel)
30973 struct expand_vec_perm_d d;
30974 unsigned int i, nelt, which;
30975 bool ret;
30977 d.vmode = vmode;
30978 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30979 d.testing_p = true;
30980 memcpy (d.perm, sel, nelt);
30982 /* Categorize the set of elements in the selector. */
30983 for (i = which = 0; i < nelt; ++i)
30985 unsigned char e = d.perm[i];
30986 gcc_assert (e < 2 * nelt);
30987 which |= (e < nelt ? 1 : 2);
30990 /* For all elements from second vector, fold the elements to first. */
30991 if (which == 2)
30992 for (i = 0; i < nelt; ++i)
30993 d.perm[i] -= nelt;
30995 /* Check whether the mask can be applied to the vector type. */
30996 d.one_vector_p = (which != 3);
30998 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30999 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31000 if (!d.one_vector_p)
31001 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31003 start_sequence ();
31004 ret = arm_expand_vec_perm_const_1 (&d);
31005 end_sequence ();
31007 return ret;
31010 bool
31011 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31013 /* If we are soft float and we do not have ldrd
31014 then all auto increment forms are ok. */
31015 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31016 return true;
31018 switch (code)
31020 /* Post increment and Pre Decrement are supported for all
31021 instruction forms except for vector forms. */
31022 case ARM_POST_INC:
31023 case ARM_PRE_DEC:
31024 if (VECTOR_MODE_P (mode))
31026 if (code != ARM_PRE_DEC)
31027 return true;
31028 else
31029 return false;
31032 return true;
31034 case ARM_POST_DEC:
31035 case ARM_PRE_INC:
31036 /* Without LDRD and mode size greater than
31037 word size, there is no point in auto-incrementing
31038 because ldm and stm will not have these forms. */
31039 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31040 return false;
31042 /* Vector and floating point modes do not support
31043 these auto increment forms. */
31044 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31045 return false;
31047 return true;
31049 default:
31050 return false;
31054 return false;
31057 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31058 on ARM, since we know that shifts by negative amounts are no-ops.
31059 Additionally, the default expansion code is not available or suitable
31060 for post-reload insn splits (this can occur when the register allocator
31061 chooses not to do a shift in NEON).
31063 This function is used in both initial expand and post-reload splits, and
31064 handles all kinds of 64-bit shifts.
31066 Input requirements:
31067 - It is safe for the input and output to be the same register, but
31068 early-clobber rules apply for the shift amount and scratch registers.
31069 - Shift by register requires both scratch registers. In all other cases
31070 the scratch registers may be NULL.
31071 - Ashiftrt by a register also clobbers the CC register. */
31072 void
31073 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31074 rtx amount, rtx scratch1, rtx scratch2)
31076 rtx out_high = gen_highpart (SImode, out);
31077 rtx out_low = gen_lowpart (SImode, out);
31078 rtx in_high = gen_highpart (SImode, in);
31079 rtx in_low = gen_lowpart (SImode, in);
31081 /* Terminology:
31082 in = the register pair containing the input value.
31083 out = the destination register pair.
31084 up = the high- or low-part of each pair.
31085 down = the opposite part to "up".
31086 In a shift, we can consider bits to shift from "up"-stream to
31087 "down"-stream, so in a left-shift "up" is the low-part and "down"
31088 is the high-part of each register pair. */
31090 rtx out_up = code == ASHIFT ? out_low : out_high;
31091 rtx out_down = code == ASHIFT ? out_high : out_low;
31092 rtx in_up = code == ASHIFT ? in_low : in_high;
31093 rtx in_down = code == ASHIFT ? in_high : in_low;
31095 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31096 gcc_assert (out
31097 && (REG_P (out) || GET_CODE (out) == SUBREG)
31098 && GET_MODE (out) == DImode);
31099 gcc_assert (in
31100 && (REG_P (in) || GET_CODE (in) == SUBREG)
31101 && GET_MODE (in) == DImode);
31102 gcc_assert (amount
31103 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31104 && GET_MODE (amount) == SImode)
31105 || CONST_INT_P (amount)));
31106 gcc_assert (scratch1 == NULL
31107 || (GET_CODE (scratch1) == SCRATCH)
31108 || (GET_MODE (scratch1) == SImode
31109 && REG_P (scratch1)));
31110 gcc_assert (scratch2 == NULL
31111 || (GET_CODE (scratch2) == SCRATCH)
31112 || (GET_MODE (scratch2) == SImode
31113 && REG_P (scratch2)));
31114 gcc_assert (!REG_P (out) || !REG_P (amount)
31115 || !HARD_REGISTER_P (out)
31116 || (REGNO (out) != REGNO (amount)
31117 && REGNO (out) + 1 != REGNO (amount)));
31119 /* Macros to make following code more readable. */
31120 #define SUB_32(DEST,SRC) \
31121 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31122 #define RSB_32(DEST,SRC) \
31123 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31124 #define SUB_S_32(DEST,SRC) \
31125 gen_addsi3_compare0 ((DEST), (SRC), \
31126 GEN_INT (-32))
31127 #define SET(DEST,SRC) \
31128 gen_rtx_SET (SImode, (DEST), (SRC))
31129 #define SHIFT(CODE,SRC,AMOUNT) \
31130 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31131 #define LSHIFT(CODE,SRC,AMOUNT) \
31132 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31133 SImode, (SRC), (AMOUNT))
31134 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31135 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31136 SImode, (SRC), (AMOUNT))
31137 #define ORR(A,B) \
31138 gen_rtx_IOR (SImode, (A), (B))
31139 #define BRANCH(COND,LABEL) \
31140 gen_arm_cond_branch ((LABEL), \
31141 gen_rtx_ ## COND (CCmode, cc_reg, \
31142 const0_rtx), \
31143 cc_reg)
31145 /* Shifts by register and shifts by constant are handled separately. */
31146 if (CONST_INT_P (amount))
31148 /* We have a shift-by-constant. */
31150 /* First, handle out-of-range shift amounts.
31151 In both cases we try to match the result an ARM instruction in a
31152 shift-by-register would give. This helps reduce execution
31153 differences between optimization levels, but it won't stop other
31154 parts of the compiler doing different things. This is "undefined
31155 behaviour, in any case. */
31156 if (INTVAL (amount) <= 0)
31157 emit_insn (gen_movdi (out, in));
31158 else if (INTVAL (amount) >= 64)
31160 if (code == ASHIFTRT)
31162 rtx const31_rtx = GEN_INT (31);
31163 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31164 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31166 else
31167 emit_insn (gen_movdi (out, const0_rtx));
31170 /* Now handle valid shifts. */
31171 else if (INTVAL (amount) < 32)
31173 /* Shifts by a constant less than 32. */
31174 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31176 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31177 emit_insn (SET (out_down,
31178 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31179 out_down)));
31180 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31182 else
31184 /* Shifts by a constant greater than 31. */
31185 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31187 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31188 if (code == ASHIFTRT)
31189 emit_insn (gen_ashrsi3 (out_up, in_up,
31190 GEN_INT (31)));
31191 else
31192 emit_insn (SET (out_up, const0_rtx));
31195 else
31197 /* We have a shift-by-register. */
31198 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31200 /* This alternative requires the scratch registers. */
31201 gcc_assert (scratch1 && REG_P (scratch1));
31202 gcc_assert (scratch2 && REG_P (scratch2));
31204 /* We will need the values "amount-32" and "32-amount" later.
31205 Swapping them around now allows the later code to be more general. */
31206 switch (code)
31208 case ASHIFT:
31209 emit_insn (SUB_32 (scratch1, amount));
31210 emit_insn (RSB_32 (scratch2, amount));
31211 break;
31212 case ASHIFTRT:
31213 emit_insn (RSB_32 (scratch1, amount));
31214 /* Also set CC = amount > 32. */
31215 emit_insn (SUB_S_32 (scratch2, amount));
31216 break;
31217 case LSHIFTRT:
31218 emit_insn (RSB_32 (scratch1, amount));
31219 emit_insn (SUB_32 (scratch2, amount));
31220 break;
31221 default:
31222 gcc_unreachable ();
31225 /* Emit code like this:
31227 arithmetic-left:
31228 out_down = in_down << amount;
31229 out_down = (in_up << (amount - 32)) | out_down;
31230 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31231 out_up = in_up << amount;
31233 arithmetic-right:
31234 out_down = in_down >> amount;
31235 out_down = (in_up << (32 - amount)) | out_down;
31236 if (amount < 32)
31237 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31238 out_up = in_up << amount;
31240 logical-right:
31241 out_down = in_down >> amount;
31242 out_down = (in_up << (32 - amount)) | out_down;
31243 if (amount < 32)
31244 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31245 out_up = in_up << amount;
31247 The ARM and Thumb2 variants are the same but implemented slightly
31248 differently. If this were only called during expand we could just
31249 use the Thumb2 case and let combine do the right thing, but this
31250 can also be called from post-reload splitters. */
31252 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31254 if (!TARGET_THUMB2)
31256 /* Emit code for ARM mode. */
31257 emit_insn (SET (out_down,
31258 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31259 if (code == ASHIFTRT)
31261 rtx done_label = gen_label_rtx ();
31262 emit_jump_insn (BRANCH (LT, done_label));
31263 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31264 out_down)));
31265 emit_label (done_label);
31267 else
31268 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31269 out_down)));
31271 else
31273 /* Emit code for Thumb2 mode.
31274 Thumb2 can't do shift and or in one insn. */
31275 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31276 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31278 if (code == ASHIFTRT)
31280 rtx done_label = gen_label_rtx ();
31281 emit_jump_insn (BRANCH (LT, done_label));
31282 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31283 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31284 emit_label (done_label);
31286 else
31288 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31289 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31293 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31296 #undef SUB_32
31297 #undef RSB_32
31298 #undef SUB_S_32
31299 #undef SET
31300 #undef SHIFT
31301 #undef LSHIFT
31302 #undef REV_LSHIFT
31303 #undef ORR
31304 #undef BRANCH
31308 /* Returns true if a valid comparison operation and makes
31309 the operands in a form that is valid. */
31310 bool
31311 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31313 enum rtx_code code = GET_CODE (*comparison);
31314 int code_int;
31315 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31316 ? GET_MODE (*op2) : GET_MODE (*op1);
31318 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31320 if (code == UNEQ || code == LTGT)
31321 return false;
31323 code_int = (int)code;
31324 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31325 PUT_CODE (*comparison, (enum rtx_code)code_int);
31327 switch (mode)
31329 case SImode:
31330 if (!arm_add_operand (*op1, mode))
31331 *op1 = force_reg (mode, *op1);
31332 if (!arm_add_operand (*op2, mode))
31333 *op2 = force_reg (mode, *op2);
31334 return true;
31336 case DImode:
31337 if (!cmpdi_operand (*op1, mode))
31338 *op1 = force_reg (mode, *op1);
31339 if (!cmpdi_operand (*op2, mode))
31340 *op2 = force_reg (mode, *op2);
31341 return true;
31343 case SFmode:
31344 case DFmode:
31345 if (!arm_float_compare_operand (*op1, mode))
31346 *op1 = force_reg (mode, *op1);
31347 if (!arm_float_compare_operand (*op2, mode))
31348 *op2 = force_reg (mode, *op2);
31349 return true;
31350 default:
31351 break;
31354 return false;
31358 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31360 static unsigned HOST_WIDE_INT
31361 arm_asan_shadow_offset (void)
31363 return (unsigned HOST_WIDE_INT) 1 << 29;
31367 /* This is a temporary fix for PR60655. Ideally we need
31368 to handle most of these cases in the generic part but
31369 currently we reject minus (..) (sym_ref). We try to
31370 ameliorate the case with minus (sym_ref1) (sym_ref2)
31371 where they are in the same section. */
31373 static bool
31374 arm_const_not_ok_for_debug_p (rtx p)
31376 tree decl_op0 = NULL;
31377 tree decl_op1 = NULL;
31379 if (GET_CODE (p) == MINUS)
31381 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31383 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31384 if (decl_op1
31385 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31386 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31388 if ((TREE_CODE (decl_op1) == VAR_DECL
31389 || TREE_CODE (decl_op1) == CONST_DECL)
31390 && (TREE_CODE (decl_op0) == VAR_DECL
31391 || TREE_CODE (decl_op0) == CONST_DECL))
31392 return (get_variable_section (decl_op1, false)
31393 != get_variable_section (decl_op0, false));
31395 if (TREE_CODE (decl_op1) == LABEL_DECL
31396 && TREE_CODE (decl_op0) == LABEL_DECL)
31397 return (DECL_CONTEXT (decl_op1)
31398 != DECL_CONTEXT (decl_op0));
31401 return true;
31405 return false;
31408 #include "gt-arm.h"