tree-ssa-threadupdate.c: Include tree-cfg.h and tree-pass.h
[official-gcc.git] / gcc / config / arm / arm.c
blob0d68f018f4d9122a1e5b56ab5e26470a70c80af3
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
67 void (*arm_lang_output_object_attributes_hook)(void);
69 struct four_ints
71 int i[4];
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm = TARGET_INITIALIZER;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack;
682 static char * minipool_startobj;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped = 5;
688 extern FILE * asm_out_file;
690 /* True if we are currently building a constant table. */
691 int making_const_table;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune = arm_none;
696 /* The current tuning set. */
697 const struct tune_params *current_tune;
699 /* Which floating point hardware to schedule for. */
700 int arm_fpu_attr;
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc *arm_fpu_desc;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label[14];
707 static int thumb_call_reg_needed;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 profile. */
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 architecture. */
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
740 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
741 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743 /* Flags that only effect tuning, not available instructions. */
744 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
745 | FL_CO_PROC)
747 #define FL_FOR_ARCH2 FL_NOTM
748 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
749 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
750 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
751 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
752 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
753 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
754 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
755 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
756 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
757 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
758 #define FL_FOR_ARCH6J FL_FOR_ARCH6
759 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
760 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
761 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
762 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
763 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
764 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
765 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
766 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
767 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
769 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
770 | FL_ARM_DIV | FL_NOTM)
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 static unsigned long insn_flags = 0;
776 /* The bits in this mask specify which instruction scheduling options should
777 be used. */
778 static unsigned long tune_flags = 0;
780 /* The highest ARM architecture version supported by the
781 target. */
782 enum base_architecture arm_base_arch = BASE_ARCH_0;
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 int arm_arch3m = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 int arm_arch4 = 0;
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 int arm_arch4t = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 int arm_arch5 = 0;
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 int arm_arch5e = 0;
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 int arm_arch6 = 0;
805 /* Nonzero if this chip supports the ARM 6K extensions. */
806 int arm_arch6k = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
848 /* Nonzero if generating Thumb instructions. */
849 int thumb_code = 0;
851 /* Nonzero if generating Thumb-1 instructions. */
852 int thumb1_code = 0;
854 /* Nonzero if we should define __THUMB_INTERWORK__ in the
855 preprocessor.
856 XXX This is a bit of a hack, it's intended to help work around
857 problems in GLD which doesn't understand that armv5t code is
858 interworking clean. */
859 int arm_cpp_interwork = 0;
861 /* Nonzero if chip supports Thumb 2. */
862 int arm_arch_thumb2;
864 /* Nonzero if chip supports integer division instruction. */
865 int arm_arch_arm_hwdiv;
866 int arm_arch_thumb_hwdiv;
868 /* Nonzero if we should use Neon to handle 64-bits operations rather
869 than core registers. */
870 int prefer_neon_for_64bits = 0;
872 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
873 we must report the mode of the memory reference from
874 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
875 enum machine_mode output_memory_reference_mode;
877 /* The register number to be used for the PIC offset register. */
878 unsigned arm_pic_register = INVALID_REGNUM;
880 /* Set to 1 after arm_reorg has started. Reset to start at the start of
881 the next function. */
882 static int after_arm_reorg = 0;
884 enum arm_pcs arm_pcs_default;
886 /* For an explanation of these variables, see final_prescan_insn below. */
887 int arm_ccfsm_state;
888 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
889 enum arm_cond_code arm_current_cc;
891 rtx arm_target_insn;
892 int arm_target_label;
893 /* The number of conditionally executed insns, including the current insn. */
894 int arm_condexec_count = 0;
895 /* A bitmask specifying the patterns for the IT block.
896 Zero means do not output an IT block before this insn. */
897 int arm_condexec_mask = 0;
898 /* The number of bits used in arm_condexec_mask. */
899 int arm_condexec_masklen = 0;
901 /* The condition codes of the ARM, and the inverse function. */
902 static const char * const arm_condition_codes[] =
904 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
905 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
908 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
909 int arm_regs_in_sequence[] =
911 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
914 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
915 #define streq(string1, string2) (strcmp (string1, string2) == 0)
917 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
918 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
919 | (1 << PIC_OFFSET_TABLE_REGNUM)))
921 /* Initialization code. */
923 struct processors
925 const char *const name;
926 enum processor_type core;
927 const char *arch;
928 enum base_architecture base_arch;
929 const unsigned long flags;
930 const struct tune_params *const tune;
934 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
935 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
936 prefetch_slots, \
937 l1_size, \
938 l1_line_size
940 /* arm generic vectorizer costs. */
941 static const
942 struct cpu_vec_costs arm_default_vec_cost = {
943 1, /* scalar_stmt_cost. */
944 1, /* scalar load_cost. */
945 1, /* scalar_store_cost. */
946 1, /* vec_stmt_cost. */
947 1, /* vec_to_scalar_cost. */
948 1, /* scalar_to_vec_cost. */
949 1, /* vec_align_load_cost. */
950 1, /* vec_unalign_load_cost. */
951 1, /* vec_unalign_store_cost. */
952 1, /* vec_store_cost. */
953 3, /* cond_taken_branch_cost. */
954 1, /* cond_not_taken_branch_cost. */
957 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
958 #include "aarch-cost-tables.h"
962 const struct cpu_cost_table cortexa9_extra_costs =
964 /* ALU */
966 0, /* Arith. */
967 0, /* Logical. */
968 0, /* Shift. */
969 COSTS_N_INSNS (1), /* Shift_reg. */
970 COSTS_N_INSNS (1), /* Arith_shift. */
971 COSTS_N_INSNS (2), /* Arith_shift_reg. */
972 0, /* Log_shift. */
973 COSTS_N_INSNS (1), /* Log_shift_reg. */
974 COSTS_N_INSNS (1), /* Extend. */
975 COSTS_N_INSNS (2), /* Extend_arith. */
976 COSTS_N_INSNS (1), /* Bfi. */
977 COSTS_N_INSNS (1), /* Bfx. */
978 0, /* Clz. */
979 0, /* non_exec. */
980 true /* non_exec_costs_exec. */
983 /* MULT SImode */
985 COSTS_N_INSNS (3), /* Simple. */
986 COSTS_N_INSNS (3), /* Flag_setting. */
987 COSTS_N_INSNS (2), /* Extend. */
988 COSTS_N_INSNS (3), /* Add. */
989 COSTS_N_INSNS (2), /* Extend_add. */
990 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
992 /* MULT DImode */
994 0, /* Simple (N/A). */
995 0, /* Flag_setting (N/A). */
996 COSTS_N_INSNS (4), /* Extend. */
997 0, /* Add (N/A). */
998 COSTS_N_INSNS (4), /* Extend_add. */
999 0 /* Idiv (N/A). */
1002 /* LD/ST */
1004 COSTS_N_INSNS (2), /* Load. */
1005 COSTS_N_INSNS (2), /* Load_sign_extend. */
1006 COSTS_N_INSNS (2), /* Ldrd. */
1007 COSTS_N_INSNS (2), /* Ldm_1st. */
1008 1, /* Ldm_regs_per_insn_1st. */
1009 2, /* Ldm_regs_per_insn_subsequent. */
1010 COSTS_N_INSNS (5), /* Loadf. */
1011 COSTS_N_INSNS (5), /* Loadd. */
1012 COSTS_N_INSNS (1), /* Load_unaligned. */
1013 COSTS_N_INSNS (2), /* Store. */
1014 COSTS_N_INSNS (2), /* Strd. */
1015 COSTS_N_INSNS (2), /* Stm_1st. */
1016 1, /* Stm_regs_per_insn_1st. */
1017 2, /* Stm_regs_per_insn_subsequent. */
1018 COSTS_N_INSNS (1), /* Storef. */
1019 COSTS_N_INSNS (1), /* Stored. */
1020 COSTS_N_INSNS (1) /* Store_unaligned. */
1023 /* FP SFmode */
1025 COSTS_N_INSNS (14), /* Div. */
1026 COSTS_N_INSNS (4), /* Mult. */
1027 COSTS_N_INSNS (7), /* Mult_addsub. */
1028 COSTS_N_INSNS (30), /* Fma. */
1029 COSTS_N_INSNS (3), /* Addsub. */
1030 COSTS_N_INSNS (1), /* Fpconst. */
1031 COSTS_N_INSNS (1), /* Neg. */
1032 COSTS_N_INSNS (3), /* Compare. */
1033 COSTS_N_INSNS (3), /* Widen. */
1034 COSTS_N_INSNS (3), /* Narrow. */
1035 COSTS_N_INSNS (3), /* Toint. */
1036 COSTS_N_INSNS (3), /* Fromint. */
1037 COSTS_N_INSNS (3) /* Roundint. */
1039 /* FP DFmode */
1041 COSTS_N_INSNS (24), /* Div. */
1042 COSTS_N_INSNS (5), /* Mult. */
1043 COSTS_N_INSNS (8), /* Mult_addsub. */
1044 COSTS_N_INSNS (30), /* Fma. */
1045 COSTS_N_INSNS (3), /* Addsub. */
1046 COSTS_N_INSNS (1), /* Fpconst. */
1047 COSTS_N_INSNS (1), /* Neg. */
1048 COSTS_N_INSNS (3), /* Compare. */
1049 COSTS_N_INSNS (3), /* Widen. */
1050 COSTS_N_INSNS (3), /* Narrow. */
1051 COSTS_N_INSNS (3), /* Toint. */
1052 COSTS_N_INSNS (3), /* Fromint. */
1053 COSTS_N_INSNS (3) /* Roundint. */
1056 /* Vector */
1058 COSTS_N_INSNS (1) /* Alu. */
1063 const struct cpu_cost_table cortexa7_extra_costs =
1065 /* ALU */
1067 0, /* Arith. */
1068 0, /* Logical. */
1069 COSTS_N_INSNS (1), /* Shift. */
1070 COSTS_N_INSNS (1), /* Shift_reg. */
1071 COSTS_N_INSNS (1), /* Arith_shift. */
1072 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1073 COSTS_N_INSNS (1), /* Log_shift. */
1074 COSTS_N_INSNS (1), /* Log_shift_reg. */
1075 COSTS_N_INSNS (1), /* Extend. */
1076 COSTS_N_INSNS (1), /* Extend_arith. */
1077 COSTS_N_INSNS (1), /* Bfi. */
1078 COSTS_N_INSNS (1), /* Bfx. */
1079 COSTS_N_INSNS (1), /* Clz. */
1080 0, /* non_exec. */
1081 true /* non_exec_costs_exec. */
1085 /* MULT SImode */
1087 0, /* Simple. */
1088 COSTS_N_INSNS (1), /* Flag_setting. */
1089 COSTS_N_INSNS (1), /* Extend. */
1090 COSTS_N_INSNS (1), /* Add. */
1091 COSTS_N_INSNS (1), /* Extend_add. */
1092 COSTS_N_INSNS (7) /* Idiv. */
1094 /* MULT DImode */
1096 0, /* Simple (N/A). */
1097 0, /* Flag_setting (N/A). */
1098 COSTS_N_INSNS (1), /* Extend. */
1099 0, /* Add. */
1100 COSTS_N_INSNS (2), /* Extend_add. */
1101 0 /* Idiv (N/A). */
1104 /* LD/ST */
1106 COSTS_N_INSNS (1), /* Load. */
1107 COSTS_N_INSNS (1), /* Load_sign_extend. */
1108 COSTS_N_INSNS (3), /* Ldrd. */
1109 COSTS_N_INSNS (1), /* Ldm_1st. */
1110 1, /* Ldm_regs_per_insn_1st. */
1111 2, /* Ldm_regs_per_insn_subsequent. */
1112 COSTS_N_INSNS (2), /* Loadf. */
1113 COSTS_N_INSNS (2), /* Loadd. */
1114 COSTS_N_INSNS (1), /* Load_unaligned. */
1115 COSTS_N_INSNS (1), /* Store. */
1116 COSTS_N_INSNS (3), /* Strd. */
1117 COSTS_N_INSNS (1), /* Stm_1st. */
1118 1, /* Stm_regs_per_insn_1st. */
1119 2, /* Stm_regs_per_insn_subsequent. */
1120 COSTS_N_INSNS (2), /* Storef. */
1121 COSTS_N_INSNS (2), /* Stored. */
1122 COSTS_N_INSNS (1) /* Store_unaligned. */
1125 /* FP SFmode */
1127 COSTS_N_INSNS (15), /* Div. */
1128 COSTS_N_INSNS (3), /* Mult. */
1129 COSTS_N_INSNS (7), /* Mult_addsub. */
1130 COSTS_N_INSNS (7), /* Fma. */
1131 COSTS_N_INSNS (3), /* Addsub. */
1132 COSTS_N_INSNS (3), /* Fpconst. */
1133 COSTS_N_INSNS (3), /* Neg. */
1134 COSTS_N_INSNS (3), /* Compare. */
1135 COSTS_N_INSNS (3), /* Widen. */
1136 COSTS_N_INSNS (3), /* Narrow. */
1137 COSTS_N_INSNS (3), /* Toint. */
1138 COSTS_N_INSNS (3), /* Fromint. */
1139 COSTS_N_INSNS (3) /* Roundint. */
1141 /* FP DFmode */
1143 COSTS_N_INSNS (30), /* Div. */
1144 COSTS_N_INSNS (6), /* Mult. */
1145 COSTS_N_INSNS (10), /* Mult_addsub. */
1146 COSTS_N_INSNS (7), /* Fma. */
1147 COSTS_N_INSNS (3), /* Addsub. */
1148 COSTS_N_INSNS (3), /* Fpconst. */
1149 COSTS_N_INSNS (3), /* Neg. */
1150 COSTS_N_INSNS (3), /* Compare. */
1151 COSTS_N_INSNS (3), /* Widen. */
1152 COSTS_N_INSNS (3), /* Narrow. */
1153 COSTS_N_INSNS (3), /* Toint. */
1154 COSTS_N_INSNS (3), /* Fromint. */
1155 COSTS_N_INSNS (3) /* Roundint. */
1158 /* Vector */
1160 COSTS_N_INSNS (1) /* Alu. */
1164 const struct cpu_cost_table cortexa15_extra_costs =
1166 /* ALU */
1168 COSTS_N_INSNS (1), /* Arith. */
1169 COSTS_N_INSNS (1), /* Logical. */
1170 COSTS_N_INSNS (1), /* Shift. */
1171 COSTS_N_INSNS (1), /* Shift_reg. */
1172 COSTS_N_INSNS (1), /* Arith_shift. */
1173 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1174 COSTS_N_INSNS (1), /* Log_shift. */
1175 COSTS_N_INSNS (1), /* Log_shift_reg. */
1176 COSTS_N_INSNS (1), /* Extend. */
1177 COSTS_N_INSNS (2), /* Extend_arith. */
1178 COSTS_N_INSNS (2), /* Bfi. */
1179 COSTS_N_INSNS (1), /* Bfx. */
1180 COSTS_N_INSNS (1), /* Clz. */
1181 COSTS_N_INSNS (1), /* non_exec. */
1182 true /* non_exec_costs_exec. */
1184 /* MULT SImode */
1187 COSTS_N_INSNS (3), /* Simple. */
1188 COSTS_N_INSNS (4), /* Flag_setting. */
1189 COSTS_N_INSNS (3), /* Extend. */
1190 COSTS_N_INSNS (4), /* Add. */
1191 COSTS_N_INSNS (4), /* Extend_add. */
1192 COSTS_N_INSNS (19) /* Idiv. */
1194 /* MULT DImode */
1196 0, /* Simple (N/A). */
1197 0, /* Flag_setting (N/A). */
1198 COSTS_N_INSNS (4), /* Extend. */
1199 0, /* Add (N/A). */
1200 COSTS_N_INSNS (6), /* Extend_add. */
1201 0 /* Idiv (N/A). */
1204 /* LD/ST */
1206 COSTS_N_INSNS (4), /* Load. */
1207 COSTS_N_INSNS (4), /* Load_sign_extend. */
1208 COSTS_N_INSNS (4), /* Ldrd. */
1209 COSTS_N_INSNS (5), /* Ldm_1st. */
1210 1, /* Ldm_regs_per_insn_1st. */
1211 2, /* Ldm_regs_per_insn_subsequent. */
1212 COSTS_N_INSNS (5), /* Loadf. */
1213 COSTS_N_INSNS (5), /* Loadd. */
1214 COSTS_N_INSNS (1), /* Load_unaligned. */
1215 COSTS_N_INSNS (1), /* Store. */
1216 COSTS_N_INSNS (1), /* Strd. */
1217 COSTS_N_INSNS (2), /* Stm_1st. */
1218 1, /* Stm_regs_per_insn_1st. */
1219 2, /* Stm_regs_per_insn_subsequent. */
1220 COSTS_N_INSNS (1), /* Storef. */
1221 COSTS_N_INSNS (1), /* Stored. */
1222 COSTS_N_INSNS (1) /* Store_unaligned. */
1225 /* FP SFmode */
1227 COSTS_N_INSNS (18), /* Div. */
1228 COSTS_N_INSNS (5), /* Mult. */
1229 COSTS_N_INSNS (3), /* Mult_addsub. */
1230 COSTS_N_INSNS (13), /* Fma. */
1231 COSTS_N_INSNS (5), /* Addsub. */
1232 COSTS_N_INSNS (5), /* Fpconst. */
1233 COSTS_N_INSNS (3), /* Neg. */
1234 COSTS_N_INSNS (3), /* Compare. */
1235 COSTS_N_INSNS (3), /* Widen. */
1236 COSTS_N_INSNS (3), /* Narrow. */
1237 COSTS_N_INSNS (3), /* Toint. */
1238 COSTS_N_INSNS (3), /* Fromint. */
1239 COSTS_N_INSNS (3) /* Roundint. */
1241 /* FP DFmode */
1243 COSTS_N_INSNS (32), /* Div. */
1244 COSTS_N_INSNS (5), /* Mult. */
1245 COSTS_N_INSNS (3), /* Mult_addsub. */
1246 COSTS_N_INSNS (13), /* Fma. */
1247 COSTS_N_INSNS (5), /* Addsub. */
1248 COSTS_N_INSNS (3), /* Fpconst. */
1249 COSTS_N_INSNS (3), /* Neg. */
1250 COSTS_N_INSNS (3), /* Compare. */
1251 COSTS_N_INSNS (3), /* Widen. */
1252 COSTS_N_INSNS (3), /* Narrow. */
1253 COSTS_N_INSNS (3), /* Toint. */
1254 COSTS_N_INSNS (3), /* Fromint. */
1255 COSTS_N_INSNS (3) /* Roundint. */
1258 /* Vector */
1260 COSTS_N_INSNS (1) /* Alu. */
1264 const struct tune_params arm_slowmul_tune =
1266 arm_slowmul_rtx_costs,
1267 NULL,
1268 NULL, /* Sched adj cost. */
1269 3, /* Constant limit. */
1270 5, /* Max cond insns. */
1271 ARM_PREFETCH_NOT_BENEFICIAL,
1272 true, /* Prefer constant pool. */
1273 arm_default_branch_cost,
1274 false, /* Prefer LDRD/STRD. */
1275 {true, true}, /* Prefer non short circuit. */
1276 &arm_default_vec_cost, /* Vectorizer costs. */
1277 false /* Prefer Neon for 64-bits bitops. */
1280 const struct tune_params arm_fastmul_tune =
1282 arm_fastmul_rtx_costs,
1283 NULL,
1284 NULL, /* Sched adj cost. */
1285 1, /* Constant limit. */
1286 5, /* Max cond insns. */
1287 ARM_PREFETCH_NOT_BENEFICIAL,
1288 true, /* Prefer constant pool. */
1289 arm_default_branch_cost,
1290 false, /* Prefer LDRD/STRD. */
1291 {true, true}, /* Prefer non short circuit. */
1292 &arm_default_vec_cost, /* Vectorizer costs. */
1293 false /* Prefer Neon for 64-bits bitops. */
1296 /* StrongARM has early execution of branches, so a sequence that is worth
1297 skipping is shorter. Set max_insns_skipped to a lower value. */
1299 const struct tune_params arm_strongarm_tune =
1301 arm_fastmul_rtx_costs,
1302 NULL,
1303 NULL, /* Sched adj cost. */
1304 1, /* Constant limit. */
1305 3, /* Max cond insns. */
1306 ARM_PREFETCH_NOT_BENEFICIAL,
1307 true, /* Prefer constant pool. */
1308 arm_default_branch_cost,
1309 false, /* Prefer LDRD/STRD. */
1310 {true, true}, /* Prefer non short circuit. */
1311 &arm_default_vec_cost, /* Vectorizer costs. */
1312 false /* Prefer Neon for 64-bits bitops. */
1315 const struct tune_params arm_xscale_tune =
1317 arm_xscale_rtx_costs,
1318 NULL,
1319 xscale_sched_adjust_cost,
1320 2, /* Constant limit. */
1321 3, /* Max cond insns. */
1322 ARM_PREFETCH_NOT_BENEFICIAL,
1323 true, /* Prefer constant pool. */
1324 arm_default_branch_cost,
1325 false, /* Prefer LDRD/STRD. */
1326 {true, true}, /* Prefer non short circuit. */
1327 &arm_default_vec_cost, /* Vectorizer costs. */
1328 false /* Prefer Neon for 64-bits bitops. */
1331 const struct tune_params arm_9e_tune =
1333 arm_9e_rtx_costs,
1334 NULL,
1335 NULL, /* Sched adj cost. */
1336 1, /* Constant limit. */
1337 5, /* Max cond insns. */
1338 ARM_PREFETCH_NOT_BENEFICIAL,
1339 true, /* Prefer constant pool. */
1340 arm_default_branch_cost,
1341 false, /* Prefer LDRD/STRD. */
1342 {true, true}, /* Prefer non short circuit. */
1343 &arm_default_vec_cost, /* Vectorizer costs. */
1344 false /* Prefer Neon for 64-bits bitops. */
1347 const struct tune_params arm_v6t2_tune =
1349 arm_9e_rtx_costs,
1350 NULL,
1351 NULL, /* Sched adj cost. */
1352 1, /* Constant limit. */
1353 5, /* Max cond insns. */
1354 ARM_PREFETCH_NOT_BENEFICIAL,
1355 false, /* Prefer constant pool. */
1356 arm_default_branch_cost,
1357 false, /* Prefer LDRD/STRD. */
1358 {true, true}, /* Prefer non short circuit. */
1359 &arm_default_vec_cost, /* Vectorizer costs. */
1360 false /* Prefer Neon for 64-bits bitops. */
1363 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1364 const struct tune_params arm_cortex_tune =
1366 arm_9e_rtx_costs,
1367 &generic_extra_costs,
1368 NULL, /* Sched adj cost. */
1369 1, /* Constant limit. */
1370 5, /* Max cond insns. */
1371 ARM_PREFETCH_NOT_BENEFICIAL,
1372 false, /* Prefer constant pool. */
1373 arm_default_branch_cost,
1374 false, /* Prefer LDRD/STRD. */
1375 {true, true}, /* Prefer non short circuit. */
1376 &arm_default_vec_cost, /* Vectorizer costs. */
1377 false /* Prefer Neon for 64-bits bitops. */
1380 const struct tune_params arm_cortex_a7_tune =
1382 arm_9e_rtx_costs,
1383 &cortexa7_extra_costs,
1384 NULL,
1385 1, /* Constant limit. */
1386 5, /* Max cond insns. */
1387 ARM_PREFETCH_NOT_BENEFICIAL,
1388 false, /* Prefer constant pool. */
1389 arm_default_branch_cost,
1390 false, /* Prefer LDRD/STRD. */
1391 {true, true}, /* Prefer non short circuit. */
1392 &arm_default_vec_cost, /* Vectorizer costs. */
1393 false /* Prefer Neon for 64-bits bitops. */
1396 const struct tune_params arm_cortex_a15_tune =
1398 arm_9e_rtx_costs,
1399 &cortexa15_extra_costs,
1400 NULL, /* Sched adj cost. */
1401 1, /* Constant limit. */
1402 2, /* Max cond insns. */
1403 ARM_PREFETCH_NOT_BENEFICIAL,
1404 false, /* Prefer constant pool. */
1405 arm_default_branch_cost,
1406 true, /* Prefer LDRD/STRD. */
1407 {true, true}, /* Prefer non short circuit. */
1408 &arm_default_vec_cost, /* Vectorizer costs. */
1409 false /* Prefer Neon for 64-bits bitops. */
1412 const struct tune_params arm_cortex_a53_tune =
1414 arm_9e_rtx_costs,
1415 &cortexa53_extra_costs,
1416 NULL, /* Scheduler cost adjustment. */
1417 1, /* Constant limit. */
1418 5, /* Max cond insns. */
1419 ARM_PREFETCH_NOT_BENEFICIAL,
1420 false, /* Prefer constant pool. */
1421 arm_default_branch_cost,
1422 false, /* Prefer LDRD/STRD. */
1423 {true, true}, /* Prefer non short circuit. */
1424 &arm_default_vec_cost, /* Vectorizer costs. */
1425 false /* Prefer Neon for 64-bits bitops. */
1428 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1429 less appealing. Set max_insns_skipped to a low value. */
1431 const struct tune_params arm_cortex_a5_tune =
1433 arm_9e_rtx_costs,
1434 NULL,
1435 NULL, /* Sched adj cost. */
1436 1, /* Constant limit. */
1437 1, /* Max cond insns. */
1438 ARM_PREFETCH_NOT_BENEFICIAL,
1439 false, /* Prefer constant pool. */
1440 arm_cortex_a5_branch_cost,
1441 false, /* Prefer LDRD/STRD. */
1442 {false, false}, /* Prefer non short circuit. */
1443 &arm_default_vec_cost, /* Vectorizer costs. */
1444 false /* Prefer Neon for 64-bits bitops. */
1447 const struct tune_params arm_cortex_a9_tune =
1449 arm_9e_rtx_costs,
1450 &cortexa9_extra_costs,
1451 cortex_a9_sched_adjust_cost,
1452 1, /* Constant limit. */
1453 5, /* Max cond insns. */
1454 ARM_PREFETCH_BENEFICIAL(4,32,32),
1455 false, /* Prefer constant pool. */
1456 arm_default_branch_cost,
1457 false, /* Prefer LDRD/STRD. */
1458 {true, true}, /* Prefer non short circuit. */
1459 &arm_default_vec_cost, /* Vectorizer costs. */
1460 false /* Prefer Neon for 64-bits bitops. */
1463 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1464 cycle to execute each. An LDR from the constant pool also takes two cycles
1465 to execute, but mildly increases pipelining opportunity (consecutive
1466 loads/stores can be pipelined together, saving one cycle), and may also
1467 improve icache utilisation. Hence we prefer the constant pool for such
1468 processors. */
1470 const struct tune_params arm_v7m_tune =
1472 arm_9e_rtx_costs,
1473 &generic_extra_costs,
1474 NULL, /* Sched adj cost. */
1475 1, /* Constant limit. */
1476 5, /* Max cond insns. */
1477 ARM_PREFETCH_NOT_BENEFICIAL,
1478 true, /* Prefer constant pool. */
1479 arm_cortex_m_branch_cost,
1480 false, /* Prefer LDRD/STRD. */
1481 {false, false}, /* Prefer non short circuit. */
1482 &arm_default_vec_cost, /* Vectorizer costs. */
1483 false /* Prefer Neon for 64-bits bitops. */
1486 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1487 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1488 const struct tune_params arm_v6m_tune =
1490 arm_9e_rtx_costs,
1491 NULL,
1492 NULL, /* Sched adj cost. */
1493 1, /* Constant limit. */
1494 5, /* Max cond insns. */
1495 ARM_PREFETCH_NOT_BENEFICIAL,
1496 false, /* Prefer constant pool. */
1497 arm_default_branch_cost,
1498 false, /* Prefer LDRD/STRD. */
1499 {false, false}, /* Prefer non short circuit. */
1500 &arm_default_vec_cost, /* Vectorizer costs. */
1501 false /* Prefer Neon for 64-bits bitops. */
1504 const struct tune_params arm_fa726te_tune =
1506 arm_9e_rtx_costs,
1507 NULL,
1508 fa726te_sched_adjust_cost,
1509 1, /* Constant limit. */
1510 5, /* Max cond insns. */
1511 ARM_PREFETCH_NOT_BENEFICIAL,
1512 true, /* Prefer constant pool. */
1513 arm_default_branch_cost,
1514 false, /* Prefer LDRD/STRD. */
1515 {true, true}, /* Prefer non short circuit. */
1516 &arm_default_vec_cost, /* Vectorizer costs. */
1517 false /* Prefer Neon for 64-bits bitops. */
1521 /* Not all of these give usefully different compilation alternatives,
1522 but there is no simple way of generalizing them. */
1523 static const struct processors all_cores[] =
1525 /* ARM Cores */
1526 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1527 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1528 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1529 #include "arm-cores.def"
1530 #undef ARM_CORE
1531 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1534 static const struct processors all_architectures[] =
1536 /* ARM Architectures */
1537 /* We don't specify tuning costs here as it will be figured out
1538 from the core. */
1540 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1541 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1542 #include "arm-arches.def"
1543 #undef ARM_ARCH
1544 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1548 /* These are populated as commandline arguments are processed, or NULL
1549 if not specified. */
1550 static const struct processors *arm_selected_arch;
1551 static const struct processors *arm_selected_cpu;
1552 static const struct processors *arm_selected_tune;
1554 /* The name of the preprocessor macro to define for this architecture. */
1556 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1558 /* Available values for -mfpu=. */
1560 static const struct arm_fpu_desc all_fpus[] =
1562 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1563 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1564 #include "arm-fpus.def"
1565 #undef ARM_FPU
1569 /* Supported TLS relocations. */
1571 enum tls_reloc {
1572 TLS_GD32,
1573 TLS_LDM32,
1574 TLS_LDO32,
1575 TLS_IE32,
1576 TLS_LE32,
1577 TLS_DESCSEQ /* GNU scheme */
1580 /* The maximum number of insns to be used when loading a constant. */
1581 inline static int
1582 arm_constant_limit (bool size_p)
1584 return size_p ? 1 : current_tune->constant_limit;
1587 /* Emit an insn that's a simple single-set. Both the operands must be known
1588 to be valid. */
1589 inline static rtx
1590 emit_set_insn (rtx x, rtx y)
1592 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1595 /* Return the number of bits set in VALUE. */
1596 static unsigned
1597 bit_count (unsigned long value)
1599 unsigned long count = 0;
1601 while (value)
1603 count++;
1604 value &= value - 1; /* Clear the least-significant set bit. */
1607 return count;
1610 typedef struct
1612 enum machine_mode mode;
1613 const char *name;
1614 } arm_fixed_mode_set;
1616 /* A small helper for setting fixed-point library libfuncs. */
1618 static void
1619 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1620 const char *funcname, const char *modename,
1621 int num_suffix)
1623 char buffer[50];
1625 if (num_suffix == 0)
1626 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1627 else
1628 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1630 set_optab_libfunc (optable, mode, buffer);
1633 static void
1634 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1635 enum machine_mode from, const char *funcname,
1636 const char *toname, const char *fromname)
1638 char buffer[50];
1639 const char *maybe_suffix_2 = "";
1641 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1642 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1643 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1644 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1645 maybe_suffix_2 = "2";
1647 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1648 maybe_suffix_2);
1650 set_conv_libfunc (optable, to, from, buffer);
1653 /* Set up library functions unique to ARM. */
1655 static void
1656 arm_init_libfuncs (void)
1658 /* For Linux, we have access to kernel support for atomic operations. */
1659 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1660 init_sync_libfuncs (2 * UNITS_PER_WORD);
1662 /* There are no special library functions unless we are using the
1663 ARM BPABI. */
1664 if (!TARGET_BPABI)
1665 return;
1667 /* The functions below are described in Section 4 of the "Run-Time
1668 ABI for the ARM architecture", Version 1.0. */
1670 /* Double-precision floating-point arithmetic. Table 2. */
1671 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1672 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1673 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1674 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1675 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1677 /* Double-precision comparisons. Table 3. */
1678 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1679 set_optab_libfunc (ne_optab, DFmode, NULL);
1680 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1681 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1682 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1683 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1684 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1686 /* Single-precision floating-point arithmetic. Table 4. */
1687 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1688 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1689 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1690 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1691 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1693 /* Single-precision comparisons. Table 5. */
1694 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1695 set_optab_libfunc (ne_optab, SFmode, NULL);
1696 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1697 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1698 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1699 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1700 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1702 /* Floating-point to integer conversions. Table 6. */
1703 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1704 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1705 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1706 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1707 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1708 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1709 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1710 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1712 /* Conversions between floating types. Table 7. */
1713 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1714 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1716 /* Integer to floating-point conversions. Table 8. */
1717 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1718 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1719 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1720 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1721 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1722 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1723 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1724 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1726 /* Long long. Table 9. */
1727 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1728 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1729 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1730 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1731 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1732 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1733 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1734 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1736 /* Integer (32/32->32) division. \S 4.3.1. */
1737 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1738 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1740 /* The divmod functions are designed so that they can be used for
1741 plain division, even though they return both the quotient and the
1742 remainder. The quotient is returned in the usual location (i.e.,
1743 r0 for SImode, {r0, r1} for DImode), just as would be expected
1744 for an ordinary division routine. Because the AAPCS calling
1745 conventions specify that all of { r0, r1, r2, r3 } are
1746 callee-saved registers, there is no need to tell the compiler
1747 explicitly that those registers are clobbered by these
1748 routines. */
1749 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1750 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1752 /* For SImode division the ABI provides div-without-mod routines,
1753 which are faster. */
1754 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1755 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1757 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1758 divmod libcalls instead. */
1759 set_optab_libfunc (smod_optab, DImode, NULL);
1760 set_optab_libfunc (umod_optab, DImode, NULL);
1761 set_optab_libfunc (smod_optab, SImode, NULL);
1762 set_optab_libfunc (umod_optab, SImode, NULL);
1764 /* Half-precision float operations. The compiler handles all operations
1765 with NULL libfuncs by converting the SFmode. */
1766 switch (arm_fp16_format)
1768 case ARM_FP16_FORMAT_IEEE:
1769 case ARM_FP16_FORMAT_ALTERNATIVE:
1771 /* Conversions. */
1772 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1773 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1774 ? "__gnu_f2h_ieee"
1775 : "__gnu_f2h_alternative"));
1776 set_conv_libfunc (sext_optab, SFmode, HFmode,
1777 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1778 ? "__gnu_h2f_ieee"
1779 : "__gnu_h2f_alternative"));
1781 /* Arithmetic. */
1782 set_optab_libfunc (add_optab, HFmode, NULL);
1783 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1784 set_optab_libfunc (smul_optab, HFmode, NULL);
1785 set_optab_libfunc (neg_optab, HFmode, NULL);
1786 set_optab_libfunc (sub_optab, HFmode, NULL);
1788 /* Comparisons. */
1789 set_optab_libfunc (eq_optab, HFmode, NULL);
1790 set_optab_libfunc (ne_optab, HFmode, NULL);
1791 set_optab_libfunc (lt_optab, HFmode, NULL);
1792 set_optab_libfunc (le_optab, HFmode, NULL);
1793 set_optab_libfunc (ge_optab, HFmode, NULL);
1794 set_optab_libfunc (gt_optab, HFmode, NULL);
1795 set_optab_libfunc (unord_optab, HFmode, NULL);
1796 break;
1798 default:
1799 break;
1802 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1804 const arm_fixed_mode_set fixed_arith_modes[] =
1806 { QQmode, "qq" },
1807 { UQQmode, "uqq" },
1808 { HQmode, "hq" },
1809 { UHQmode, "uhq" },
1810 { SQmode, "sq" },
1811 { USQmode, "usq" },
1812 { DQmode, "dq" },
1813 { UDQmode, "udq" },
1814 { TQmode, "tq" },
1815 { UTQmode, "utq" },
1816 { HAmode, "ha" },
1817 { UHAmode, "uha" },
1818 { SAmode, "sa" },
1819 { USAmode, "usa" },
1820 { DAmode, "da" },
1821 { UDAmode, "uda" },
1822 { TAmode, "ta" },
1823 { UTAmode, "uta" }
1825 const arm_fixed_mode_set fixed_conv_modes[] =
1827 { QQmode, "qq" },
1828 { UQQmode, "uqq" },
1829 { HQmode, "hq" },
1830 { UHQmode, "uhq" },
1831 { SQmode, "sq" },
1832 { USQmode, "usq" },
1833 { DQmode, "dq" },
1834 { UDQmode, "udq" },
1835 { TQmode, "tq" },
1836 { UTQmode, "utq" },
1837 { HAmode, "ha" },
1838 { UHAmode, "uha" },
1839 { SAmode, "sa" },
1840 { USAmode, "usa" },
1841 { DAmode, "da" },
1842 { UDAmode, "uda" },
1843 { TAmode, "ta" },
1844 { UTAmode, "uta" },
1845 { QImode, "qi" },
1846 { HImode, "hi" },
1847 { SImode, "si" },
1848 { DImode, "di" },
1849 { TImode, "ti" },
1850 { SFmode, "sf" },
1851 { DFmode, "df" }
1853 unsigned int i, j;
1855 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1857 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1858 "add", fixed_arith_modes[i].name, 3);
1859 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1860 "ssadd", fixed_arith_modes[i].name, 3);
1861 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1862 "usadd", fixed_arith_modes[i].name, 3);
1863 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1864 "sub", fixed_arith_modes[i].name, 3);
1865 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1866 "sssub", fixed_arith_modes[i].name, 3);
1867 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1868 "ussub", fixed_arith_modes[i].name, 3);
1869 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1870 "mul", fixed_arith_modes[i].name, 3);
1871 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1872 "ssmul", fixed_arith_modes[i].name, 3);
1873 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1874 "usmul", fixed_arith_modes[i].name, 3);
1875 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1876 "div", fixed_arith_modes[i].name, 3);
1877 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1878 "udiv", fixed_arith_modes[i].name, 3);
1879 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1880 "ssdiv", fixed_arith_modes[i].name, 3);
1881 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1882 "usdiv", fixed_arith_modes[i].name, 3);
1883 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1884 "neg", fixed_arith_modes[i].name, 2);
1885 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1886 "ssneg", fixed_arith_modes[i].name, 2);
1887 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1888 "usneg", fixed_arith_modes[i].name, 2);
1889 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1890 "ashl", fixed_arith_modes[i].name, 3);
1891 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1892 "ashr", fixed_arith_modes[i].name, 3);
1893 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1894 "lshr", fixed_arith_modes[i].name, 3);
1895 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1896 "ssashl", fixed_arith_modes[i].name, 3);
1897 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1898 "usashl", fixed_arith_modes[i].name, 3);
1899 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1900 "cmp", fixed_arith_modes[i].name, 2);
1903 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1904 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1906 if (i == j
1907 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1908 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1909 continue;
1911 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1912 fixed_conv_modes[j].mode, "fract",
1913 fixed_conv_modes[i].name,
1914 fixed_conv_modes[j].name);
1915 arm_set_fixed_conv_libfunc (satfract_optab,
1916 fixed_conv_modes[i].mode,
1917 fixed_conv_modes[j].mode, "satfract",
1918 fixed_conv_modes[i].name,
1919 fixed_conv_modes[j].name);
1920 arm_set_fixed_conv_libfunc (fractuns_optab,
1921 fixed_conv_modes[i].mode,
1922 fixed_conv_modes[j].mode, "fractuns",
1923 fixed_conv_modes[i].name,
1924 fixed_conv_modes[j].name);
1925 arm_set_fixed_conv_libfunc (satfractuns_optab,
1926 fixed_conv_modes[i].mode,
1927 fixed_conv_modes[j].mode, "satfractuns",
1928 fixed_conv_modes[i].name,
1929 fixed_conv_modes[j].name);
1933 if (TARGET_AAPCS_BASED)
1934 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1937 /* On AAPCS systems, this is the "struct __va_list". */
1938 static GTY(()) tree va_list_type;
1940 /* Return the type to use as __builtin_va_list. */
1941 static tree
1942 arm_build_builtin_va_list (void)
1944 tree va_list_name;
1945 tree ap_field;
1947 if (!TARGET_AAPCS_BASED)
1948 return std_build_builtin_va_list ();
1950 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1951 defined as:
1953 struct __va_list
1955 void *__ap;
1958 The C Library ABI further reinforces this definition in \S
1959 4.1.
1961 We must follow this definition exactly. The structure tag
1962 name is visible in C++ mangled names, and thus forms a part
1963 of the ABI. The field name may be used by people who
1964 #include <stdarg.h>. */
1965 /* Create the type. */
1966 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1967 /* Give it the required name. */
1968 va_list_name = build_decl (BUILTINS_LOCATION,
1969 TYPE_DECL,
1970 get_identifier ("__va_list"),
1971 va_list_type);
1972 DECL_ARTIFICIAL (va_list_name) = 1;
1973 TYPE_NAME (va_list_type) = va_list_name;
1974 TYPE_STUB_DECL (va_list_type) = va_list_name;
1975 /* Create the __ap field. */
1976 ap_field = build_decl (BUILTINS_LOCATION,
1977 FIELD_DECL,
1978 get_identifier ("__ap"),
1979 ptr_type_node);
1980 DECL_ARTIFICIAL (ap_field) = 1;
1981 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1982 TYPE_FIELDS (va_list_type) = ap_field;
1983 /* Compute its layout. */
1984 layout_type (va_list_type);
1986 return va_list_type;
1989 /* Return an expression of type "void *" pointing to the next
1990 available argument in a variable-argument list. VALIST is the
1991 user-level va_list object, of type __builtin_va_list. */
1992 static tree
1993 arm_extract_valist_ptr (tree valist)
1995 if (TREE_TYPE (valist) == error_mark_node)
1996 return error_mark_node;
1998 /* On an AAPCS target, the pointer is stored within "struct
1999 va_list". */
2000 if (TARGET_AAPCS_BASED)
2002 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2003 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2004 valist, ap_field, NULL_TREE);
2007 return valist;
2010 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2011 static void
2012 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2014 valist = arm_extract_valist_ptr (valist);
2015 std_expand_builtin_va_start (valist, nextarg);
2018 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2019 static tree
2020 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2021 gimple_seq *post_p)
2023 valist = arm_extract_valist_ptr (valist);
2024 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2027 /* Fix up any incompatible options that the user has specified. */
2028 static void
2029 arm_option_override (void)
2031 if (global_options_set.x_arm_arch_option)
2032 arm_selected_arch = &all_architectures[arm_arch_option];
2034 if (global_options_set.x_arm_cpu_option)
2035 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2037 if (global_options_set.x_arm_tune_option)
2038 arm_selected_tune = &all_cores[(int) arm_tune_option];
2040 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2041 SUBTARGET_OVERRIDE_OPTIONS;
2042 #endif
2044 if (arm_selected_arch)
2046 if (arm_selected_cpu)
2048 /* Check for conflict between mcpu and march. */
2049 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2051 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2052 arm_selected_cpu->name, arm_selected_arch->name);
2053 /* -march wins for code generation.
2054 -mcpu wins for default tuning. */
2055 if (!arm_selected_tune)
2056 arm_selected_tune = arm_selected_cpu;
2058 arm_selected_cpu = arm_selected_arch;
2060 else
2061 /* -mcpu wins. */
2062 arm_selected_arch = NULL;
2064 else
2065 /* Pick a CPU based on the architecture. */
2066 arm_selected_cpu = arm_selected_arch;
2069 /* If the user did not specify a processor, choose one for them. */
2070 if (!arm_selected_cpu)
2072 const struct processors * sel;
2073 unsigned int sought;
2075 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2076 if (!arm_selected_cpu->name)
2078 #ifdef SUBTARGET_CPU_DEFAULT
2079 /* Use the subtarget default CPU if none was specified by
2080 configure. */
2081 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2082 #endif
2083 /* Default to ARM6. */
2084 if (!arm_selected_cpu->name)
2085 arm_selected_cpu = &all_cores[arm6];
2088 sel = arm_selected_cpu;
2089 insn_flags = sel->flags;
2091 /* Now check to see if the user has specified some command line
2092 switch that require certain abilities from the cpu. */
2093 sought = 0;
2095 if (TARGET_INTERWORK || TARGET_THUMB)
2097 sought |= (FL_THUMB | FL_MODE32);
2099 /* There are no ARM processors that support both APCS-26 and
2100 interworking. Therefore we force FL_MODE26 to be removed
2101 from insn_flags here (if it was set), so that the search
2102 below will always be able to find a compatible processor. */
2103 insn_flags &= ~FL_MODE26;
2106 if (sought != 0 && ((sought & insn_flags) != sought))
2108 /* Try to locate a CPU type that supports all of the abilities
2109 of the default CPU, plus the extra abilities requested by
2110 the user. */
2111 for (sel = all_cores; sel->name != NULL; sel++)
2112 if ((sel->flags & sought) == (sought | insn_flags))
2113 break;
2115 if (sel->name == NULL)
2117 unsigned current_bit_count = 0;
2118 const struct processors * best_fit = NULL;
2120 /* Ideally we would like to issue an error message here
2121 saying that it was not possible to find a CPU compatible
2122 with the default CPU, but which also supports the command
2123 line options specified by the programmer, and so they
2124 ought to use the -mcpu=<name> command line option to
2125 override the default CPU type.
2127 If we cannot find a cpu that has both the
2128 characteristics of the default cpu and the given
2129 command line options we scan the array again looking
2130 for a best match. */
2131 for (sel = all_cores; sel->name != NULL; sel++)
2132 if ((sel->flags & sought) == sought)
2134 unsigned count;
2136 count = bit_count (sel->flags & insn_flags);
2138 if (count >= current_bit_count)
2140 best_fit = sel;
2141 current_bit_count = count;
2145 gcc_assert (best_fit);
2146 sel = best_fit;
2149 arm_selected_cpu = sel;
2153 gcc_assert (arm_selected_cpu);
2154 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2155 if (!arm_selected_tune)
2156 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2158 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2159 insn_flags = arm_selected_cpu->flags;
2160 arm_base_arch = arm_selected_cpu->base_arch;
2162 arm_tune = arm_selected_tune->core;
2163 tune_flags = arm_selected_tune->flags;
2164 current_tune = arm_selected_tune->tune;
2166 /* Make sure that the processor choice does not conflict with any of the
2167 other command line choices. */
2168 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2169 error ("target CPU does not support ARM mode");
2171 /* BPABI targets use linker tricks to allow interworking on cores
2172 without thumb support. */
2173 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2175 warning (0, "target CPU does not support interworking" );
2176 target_flags &= ~MASK_INTERWORK;
2179 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2181 warning (0, "target CPU does not support THUMB instructions");
2182 target_flags &= ~MASK_THUMB;
2185 if (TARGET_APCS_FRAME && TARGET_THUMB)
2187 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2188 target_flags &= ~MASK_APCS_FRAME;
2191 /* Callee super interworking implies thumb interworking. Adding
2192 this to the flags here simplifies the logic elsewhere. */
2193 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2194 target_flags |= MASK_INTERWORK;
2196 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2197 from here where no function is being compiled currently. */
2198 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2199 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2201 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2202 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2204 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2206 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2207 target_flags |= MASK_APCS_FRAME;
2210 if (TARGET_POKE_FUNCTION_NAME)
2211 target_flags |= MASK_APCS_FRAME;
2213 if (TARGET_APCS_REENT && flag_pic)
2214 error ("-fpic and -mapcs-reent are incompatible");
2216 if (TARGET_APCS_REENT)
2217 warning (0, "APCS reentrant code not supported. Ignored");
2219 /* If this target is normally configured to use APCS frames, warn if they
2220 are turned off and debugging is turned on. */
2221 if (TARGET_ARM
2222 && write_symbols != NO_DEBUG
2223 && !TARGET_APCS_FRAME
2224 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2225 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2227 if (TARGET_APCS_FLOAT)
2228 warning (0, "passing floating point arguments in fp regs not yet supported");
2230 if (TARGET_LITTLE_WORDS)
2231 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2232 "will be removed in a future release");
2234 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2235 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2236 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2237 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2238 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2239 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2240 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2241 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2242 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2243 arm_arch6m = arm_arch6 && !arm_arch_notm;
2244 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2245 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2246 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2247 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2248 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2250 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2251 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2252 thumb_code = TARGET_ARM == 0;
2253 thumb1_code = TARGET_THUMB1 != 0;
2254 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2255 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2256 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2257 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2258 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2259 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2260 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2261 if (arm_restrict_it == 2)
2262 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2264 if (!TARGET_THUMB2)
2265 arm_restrict_it = 0;
2267 /* If we are not using the default (ARM mode) section anchor offset
2268 ranges, then set the correct ranges now. */
2269 if (TARGET_THUMB1)
2271 /* Thumb-1 LDR instructions cannot have negative offsets.
2272 Permissible positive offset ranges are 5-bit (for byte loads),
2273 6-bit (for halfword loads), or 7-bit (for word loads).
2274 Empirical results suggest a 7-bit anchor range gives the best
2275 overall code size. */
2276 targetm.min_anchor_offset = 0;
2277 targetm.max_anchor_offset = 127;
2279 else if (TARGET_THUMB2)
2281 /* The minimum is set such that the total size of the block
2282 for a particular anchor is 248 + 1 + 4095 bytes, which is
2283 divisible by eight, ensuring natural spacing of anchors. */
2284 targetm.min_anchor_offset = -248;
2285 targetm.max_anchor_offset = 4095;
2288 /* V5 code we generate is completely interworking capable, so we turn off
2289 TARGET_INTERWORK here to avoid many tests later on. */
2291 /* XXX However, we must pass the right pre-processor defines to CPP
2292 or GLD can get confused. This is a hack. */
2293 if (TARGET_INTERWORK)
2294 arm_cpp_interwork = 1;
2296 if (arm_arch5)
2297 target_flags &= ~MASK_INTERWORK;
2299 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2300 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2302 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2303 error ("iwmmxt abi requires an iwmmxt capable cpu");
2305 if (!global_options_set.x_arm_fpu_index)
2307 const char *target_fpu_name;
2308 bool ok;
2310 #ifdef FPUTYPE_DEFAULT
2311 target_fpu_name = FPUTYPE_DEFAULT;
2312 #else
2313 target_fpu_name = "vfp";
2314 #endif
2316 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2317 CL_TARGET);
2318 gcc_assert (ok);
2321 arm_fpu_desc = &all_fpus[arm_fpu_index];
2323 switch (arm_fpu_desc->model)
2325 case ARM_FP_MODEL_VFP:
2326 arm_fpu_attr = FPU_VFP;
2327 break;
2329 default:
2330 gcc_unreachable();
2333 if (TARGET_AAPCS_BASED)
2335 if (TARGET_CALLER_INTERWORKING)
2336 error ("AAPCS does not support -mcaller-super-interworking");
2337 else
2338 if (TARGET_CALLEE_INTERWORKING)
2339 error ("AAPCS does not support -mcallee-super-interworking");
2342 /* iWMMXt and NEON are incompatible. */
2343 if (TARGET_IWMMXT && TARGET_NEON)
2344 error ("iWMMXt and NEON are incompatible");
2346 /* iWMMXt unsupported under Thumb mode. */
2347 if (TARGET_THUMB && TARGET_IWMMXT)
2348 error ("iWMMXt unsupported under Thumb mode");
2350 /* __fp16 support currently assumes the core has ldrh. */
2351 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2352 sorry ("__fp16 and no ldrh");
2354 /* If soft-float is specified then don't use FPU. */
2355 if (TARGET_SOFT_FLOAT)
2356 arm_fpu_attr = FPU_NONE;
2358 if (TARGET_AAPCS_BASED)
2360 if (arm_abi == ARM_ABI_IWMMXT)
2361 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2362 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2363 && TARGET_HARD_FLOAT
2364 && TARGET_VFP)
2365 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2366 else
2367 arm_pcs_default = ARM_PCS_AAPCS;
2369 else
2371 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2372 sorry ("-mfloat-abi=hard and VFP");
2374 if (arm_abi == ARM_ABI_APCS)
2375 arm_pcs_default = ARM_PCS_APCS;
2376 else
2377 arm_pcs_default = ARM_PCS_ATPCS;
2380 /* For arm2/3 there is no need to do any scheduling if we are doing
2381 software floating-point. */
2382 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2383 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2385 /* Use the cp15 method if it is available. */
2386 if (target_thread_pointer == TP_AUTO)
2388 if (arm_arch6k && !TARGET_THUMB1)
2389 target_thread_pointer = TP_CP15;
2390 else
2391 target_thread_pointer = TP_SOFT;
2394 if (TARGET_HARD_TP && TARGET_THUMB1)
2395 error ("can not use -mtp=cp15 with 16-bit Thumb");
2397 /* Override the default structure alignment for AAPCS ABI. */
2398 if (!global_options_set.x_arm_structure_size_boundary)
2400 if (TARGET_AAPCS_BASED)
2401 arm_structure_size_boundary = 8;
2403 else
2405 if (arm_structure_size_boundary != 8
2406 && arm_structure_size_boundary != 32
2407 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2409 if (ARM_DOUBLEWORD_ALIGN)
2410 warning (0,
2411 "structure size boundary can only be set to 8, 32 or 64");
2412 else
2413 warning (0, "structure size boundary can only be set to 8 or 32");
2414 arm_structure_size_boundary
2415 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2419 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2421 error ("RTP PIC is incompatible with Thumb");
2422 flag_pic = 0;
2425 /* If stack checking is disabled, we can use r10 as the PIC register,
2426 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2427 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2429 if (TARGET_VXWORKS_RTP)
2430 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2431 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2434 if (flag_pic && TARGET_VXWORKS_RTP)
2435 arm_pic_register = 9;
2437 if (arm_pic_register_string != NULL)
2439 int pic_register = decode_reg_name (arm_pic_register_string);
2441 if (!flag_pic)
2442 warning (0, "-mpic-register= is useless without -fpic");
2444 /* Prevent the user from choosing an obviously stupid PIC register. */
2445 else if (pic_register < 0 || call_used_regs[pic_register]
2446 || pic_register == HARD_FRAME_POINTER_REGNUM
2447 || pic_register == STACK_POINTER_REGNUM
2448 || pic_register >= PC_REGNUM
2449 || (TARGET_VXWORKS_RTP
2450 && (unsigned int) pic_register != arm_pic_register))
2451 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2452 else
2453 arm_pic_register = pic_register;
2456 if (TARGET_VXWORKS_RTP
2457 && !global_options_set.x_arm_pic_data_is_text_relative)
2458 arm_pic_data_is_text_relative = 0;
2460 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2461 if (fix_cm3_ldrd == 2)
2463 if (arm_selected_cpu->core == cortexm3)
2464 fix_cm3_ldrd = 1;
2465 else
2466 fix_cm3_ldrd = 0;
2469 /* Enable -munaligned-access by default for
2470 - all ARMv6 architecture-based processors
2471 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2472 - ARMv8 architecture-base processors.
2474 Disable -munaligned-access by default for
2475 - all pre-ARMv6 architecture-based processors
2476 - ARMv6-M architecture-based processors. */
2478 if (unaligned_access == 2)
2480 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2481 unaligned_access = 1;
2482 else
2483 unaligned_access = 0;
2485 else if (unaligned_access == 1
2486 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2488 warning (0, "target CPU does not support unaligned accesses");
2489 unaligned_access = 0;
2492 if (TARGET_THUMB1 && flag_schedule_insns)
2494 /* Don't warn since it's on by default in -O2. */
2495 flag_schedule_insns = 0;
2498 if (optimize_size)
2500 /* If optimizing for size, bump the number of instructions that we
2501 are prepared to conditionally execute (even on a StrongARM). */
2502 max_insns_skipped = 6;
2504 else
2505 max_insns_skipped = current_tune->max_insns_skipped;
2507 /* Hot/Cold partitioning is not currently supported, since we can't
2508 handle literal pool placement in that case. */
2509 if (flag_reorder_blocks_and_partition)
2511 inform (input_location,
2512 "-freorder-blocks-and-partition not supported on this architecture");
2513 flag_reorder_blocks_and_partition = 0;
2514 flag_reorder_blocks = 1;
2517 if (flag_pic)
2518 /* Hoisting PIC address calculations more aggressively provides a small,
2519 but measurable, size reduction for PIC code. Therefore, we decrease
2520 the bar for unrestricted expression hoisting to the cost of PIC address
2521 calculation, which is 2 instructions. */
2522 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2523 global_options.x_param_values,
2524 global_options_set.x_param_values);
2526 /* ARM EABI defaults to strict volatile bitfields. */
2527 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2528 && abi_version_at_least(2))
2529 flag_strict_volatile_bitfields = 1;
2531 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2532 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2533 if (flag_prefetch_loop_arrays < 0
2534 && HAVE_prefetch
2535 && optimize >= 3
2536 && current_tune->num_prefetch_slots > 0)
2537 flag_prefetch_loop_arrays = 1;
2539 /* Set up parameters to be used in prefetching algorithm. Do not override the
2540 defaults unless we are tuning for a core we have researched values for. */
2541 if (current_tune->num_prefetch_slots > 0)
2542 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2543 current_tune->num_prefetch_slots,
2544 global_options.x_param_values,
2545 global_options_set.x_param_values);
2546 if (current_tune->l1_cache_line_size >= 0)
2547 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2548 current_tune->l1_cache_line_size,
2549 global_options.x_param_values,
2550 global_options_set.x_param_values);
2551 if (current_tune->l1_cache_size >= 0)
2552 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2553 current_tune->l1_cache_size,
2554 global_options.x_param_values,
2555 global_options_set.x_param_values);
2557 /* Use Neon to perform 64-bits operations rather than core
2558 registers. */
2559 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2560 if (use_neon_for_64bits == 1)
2561 prefer_neon_for_64bits = true;
2563 /* Use the alternative scheduling-pressure algorithm by default. */
2564 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2565 global_options.x_param_values,
2566 global_options_set.x_param_values);
2568 /* Disable shrink-wrap when optimizing function for size, since it tends to
2569 generate additional returns. */
2570 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2571 flag_shrink_wrap = false;
2572 /* TBD: Dwarf info for apcs frame is not handled yet. */
2573 if (TARGET_APCS_FRAME)
2574 flag_shrink_wrap = false;
2576 /* Register global variables with the garbage collector. */
2577 arm_add_gc_roots ();
2580 static void
2581 arm_add_gc_roots (void)
2583 gcc_obstack_init(&minipool_obstack);
2584 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2587 /* A table of known ARM exception types.
2588 For use with the interrupt function attribute. */
2590 typedef struct
2592 const char *const arg;
2593 const unsigned long return_value;
2595 isr_attribute_arg;
2597 static const isr_attribute_arg isr_attribute_args [] =
2599 { "IRQ", ARM_FT_ISR },
2600 { "irq", ARM_FT_ISR },
2601 { "FIQ", ARM_FT_FIQ },
2602 { "fiq", ARM_FT_FIQ },
2603 { "ABORT", ARM_FT_ISR },
2604 { "abort", ARM_FT_ISR },
2605 { "ABORT", ARM_FT_ISR },
2606 { "abort", ARM_FT_ISR },
2607 { "UNDEF", ARM_FT_EXCEPTION },
2608 { "undef", ARM_FT_EXCEPTION },
2609 { "SWI", ARM_FT_EXCEPTION },
2610 { "swi", ARM_FT_EXCEPTION },
2611 { NULL, ARM_FT_NORMAL }
2614 /* Returns the (interrupt) function type of the current
2615 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2617 static unsigned long
2618 arm_isr_value (tree argument)
2620 const isr_attribute_arg * ptr;
2621 const char * arg;
2623 if (!arm_arch_notm)
2624 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2626 /* No argument - default to IRQ. */
2627 if (argument == NULL_TREE)
2628 return ARM_FT_ISR;
2630 /* Get the value of the argument. */
2631 if (TREE_VALUE (argument) == NULL_TREE
2632 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2633 return ARM_FT_UNKNOWN;
2635 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2637 /* Check it against the list of known arguments. */
2638 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2639 if (streq (arg, ptr->arg))
2640 return ptr->return_value;
2642 /* An unrecognized interrupt type. */
2643 return ARM_FT_UNKNOWN;
2646 /* Computes the type of the current function. */
2648 static unsigned long
2649 arm_compute_func_type (void)
2651 unsigned long type = ARM_FT_UNKNOWN;
2652 tree a;
2653 tree attr;
2655 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2657 /* Decide if the current function is volatile. Such functions
2658 never return, and many memory cycles can be saved by not storing
2659 register values that will never be needed again. This optimization
2660 was added to speed up context switching in a kernel application. */
2661 if (optimize > 0
2662 && (TREE_NOTHROW (current_function_decl)
2663 || !(flag_unwind_tables
2664 || (flag_exceptions
2665 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2666 && TREE_THIS_VOLATILE (current_function_decl))
2667 type |= ARM_FT_VOLATILE;
2669 if (cfun->static_chain_decl != NULL)
2670 type |= ARM_FT_NESTED;
2672 attr = DECL_ATTRIBUTES (current_function_decl);
2674 a = lookup_attribute ("naked", attr);
2675 if (a != NULL_TREE)
2676 type |= ARM_FT_NAKED;
2678 a = lookup_attribute ("isr", attr);
2679 if (a == NULL_TREE)
2680 a = lookup_attribute ("interrupt", attr);
2682 if (a == NULL_TREE)
2683 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2684 else
2685 type |= arm_isr_value (TREE_VALUE (a));
2687 return type;
2690 /* Returns the type of the current function. */
2692 unsigned long
2693 arm_current_func_type (void)
2695 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2696 cfun->machine->func_type = arm_compute_func_type ();
2698 return cfun->machine->func_type;
2701 bool
2702 arm_allocate_stack_slots_for_args (void)
2704 /* Naked functions should not allocate stack slots for arguments. */
2705 return !IS_NAKED (arm_current_func_type ());
2708 static bool
2709 arm_warn_func_return (tree decl)
2711 /* Naked functions are implemented entirely in assembly, including the
2712 return sequence, so suppress warnings about this. */
2713 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2717 /* Output assembler code for a block containing the constant parts
2718 of a trampoline, leaving space for the variable parts.
2720 On the ARM, (if r8 is the static chain regnum, and remembering that
2721 referencing pc adds an offset of 8) the trampoline looks like:
2722 ldr r8, [pc, #0]
2723 ldr pc, [pc]
2724 .word static chain value
2725 .word function's address
2726 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2728 static void
2729 arm_asm_trampoline_template (FILE *f)
2731 if (TARGET_ARM)
2733 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2734 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2736 else if (TARGET_THUMB2)
2738 /* The Thumb-2 trampoline is similar to the arm implementation.
2739 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2740 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2741 STATIC_CHAIN_REGNUM, PC_REGNUM);
2742 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2744 else
2746 ASM_OUTPUT_ALIGN (f, 2);
2747 fprintf (f, "\t.code\t16\n");
2748 fprintf (f, ".Ltrampoline_start:\n");
2749 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2750 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2751 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2752 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2753 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2754 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2756 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2757 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2760 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2762 static void
2763 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2765 rtx fnaddr, mem, a_tramp;
2767 emit_block_move (m_tramp, assemble_trampoline_template (),
2768 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2770 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2771 emit_move_insn (mem, chain_value);
2773 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2774 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2775 emit_move_insn (mem, fnaddr);
2777 a_tramp = XEXP (m_tramp, 0);
2778 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2779 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2780 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2783 /* Thumb trampolines should be entered in thumb mode, so set
2784 the bottom bit of the address. */
2786 static rtx
2787 arm_trampoline_adjust_address (rtx addr)
2789 if (TARGET_THUMB)
2790 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2791 NULL, 0, OPTAB_LIB_WIDEN);
2792 return addr;
2795 /* Return 1 if it is possible to return using a single instruction.
2796 If SIBLING is non-null, this is a test for a return before a sibling
2797 call. SIBLING is the call insn, so we can examine its register usage. */
2800 use_return_insn (int iscond, rtx sibling)
2802 int regno;
2803 unsigned int func_type;
2804 unsigned long saved_int_regs;
2805 unsigned HOST_WIDE_INT stack_adjust;
2806 arm_stack_offsets *offsets;
2808 /* Never use a return instruction before reload has run. */
2809 if (!reload_completed)
2810 return 0;
2812 func_type = arm_current_func_type ();
2814 /* Naked, volatile and stack alignment functions need special
2815 consideration. */
2816 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2817 return 0;
2819 /* So do interrupt functions that use the frame pointer and Thumb
2820 interrupt functions. */
2821 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2822 return 0;
2824 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2825 && !optimize_function_for_size_p (cfun))
2826 return 0;
2828 offsets = arm_get_frame_offsets ();
2829 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2831 /* As do variadic functions. */
2832 if (crtl->args.pretend_args_size
2833 || cfun->machine->uses_anonymous_args
2834 /* Or if the function calls __builtin_eh_return () */
2835 || crtl->calls_eh_return
2836 /* Or if the function calls alloca */
2837 || cfun->calls_alloca
2838 /* Or if there is a stack adjustment. However, if the stack pointer
2839 is saved on the stack, we can use a pre-incrementing stack load. */
2840 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2841 && stack_adjust == 4)))
2842 return 0;
2844 saved_int_regs = offsets->saved_regs_mask;
2846 /* Unfortunately, the insn
2848 ldmib sp, {..., sp, ...}
2850 triggers a bug on most SA-110 based devices, such that the stack
2851 pointer won't be correctly restored if the instruction takes a
2852 page fault. We work around this problem by popping r3 along with
2853 the other registers, since that is never slower than executing
2854 another instruction.
2856 We test for !arm_arch5 here, because code for any architecture
2857 less than this could potentially be run on one of the buggy
2858 chips. */
2859 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2861 /* Validate that r3 is a call-clobbered register (always true in
2862 the default abi) ... */
2863 if (!call_used_regs[3])
2864 return 0;
2866 /* ... that it isn't being used for a return value ... */
2867 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2868 return 0;
2870 /* ... or for a tail-call argument ... */
2871 if (sibling)
2873 gcc_assert (CALL_P (sibling));
2875 if (find_regno_fusage (sibling, USE, 3))
2876 return 0;
2879 /* ... and that there are no call-saved registers in r0-r2
2880 (always true in the default ABI). */
2881 if (saved_int_regs & 0x7)
2882 return 0;
2885 /* Can't be done if interworking with Thumb, and any registers have been
2886 stacked. */
2887 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2888 return 0;
2890 /* On StrongARM, conditional returns are expensive if they aren't
2891 taken and multiple registers have been stacked. */
2892 if (iscond && arm_tune_strongarm)
2894 /* Conditional return when just the LR is stored is a simple
2895 conditional-load instruction, that's not expensive. */
2896 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2897 return 0;
2899 if (flag_pic
2900 && arm_pic_register != INVALID_REGNUM
2901 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2902 return 0;
2905 /* If there are saved registers but the LR isn't saved, then we need
2906 two instructions for the return. */
2907 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2908 return 0;
2910 /* Can't be done if any of the VFP regs are pushed,
2911 since this also requires an insn. */
2912 if (TARGET_HARD_FLOAT && TARGET_VFP)
2913 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2914 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2915 return 0;
2917 if (TARGET_REALLY_IWMMXT)
2918 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2919 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2920 return 0;
2922 return 1;
2925 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2926 shrink-wrapping if possible. This is the case if we need to emit a
2927 prologue, which we can test by looking at the offsets. */
2928 bool
2929 use_simple_return_p (void)
2931 arm_stack_offsets *offsets;
2933 offsets = arm_get_frame_offsets ();
2934 return offsets->outgoing_args != 0;
2937 /* Return TRUE if int I is a valid immediate ARM constant. */
2940 const_ok_for_arm (HOST_WIDE_INT i)
2942 int lowbit;
2944 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2945 be all zero, or all one. */
2946 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2947 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2948 != ((~(unsigned HOST_WIDE_INT) 0)
2949 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2950 return FALSE;
2952 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2954 /* Fast return for 0 and small values. We must do this for zero, since
2955 the code below can't handle that one case. */
2956 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2957 return TRUE;
2959 /* Get the number of trailing zeros. */
2960 lowbit = ffs((int) i) - 1;
2962 /* Only even shifts are allowed in ARM mode so round down to the
2963 nearest even number. */
2964 if (TARGET_ARM)
2965 lowbit &= ~1;
2967 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2968 return TRUE;
2970 if (TARGET_ARM)
2972 /* Allow rotated constants in ARM mode. */
2973 if (lowbit <= 4
2974 && ((i & ~0xc000003f) == 0
2975 || (i & ~0xf000000f) == 0
2976 || (i & ~0xfc000003) == 0))
2977 return TRUE;
2979 else
2981 HOST_WIDE_INT v;
2983 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2984 v = i & 0xff;
2985 v |= v << 16;
2986 if (i == v || i == (v | (v << 8)))
2987 return TRUE;
2989 /* Allow repeated pattern 0xXY00XY00. */
2990 v = i & 0xff00;
2991 v |= v << 16;
2992 if (i == v)
2993 return TRUE;
2996 return FALSE;
2999 /* Return true if I is a valid constant for the operation CODE. */
3001 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3003 if (const_ok_for_arm (i))
3004 return 1;
3006 switch (code)
3008 case SET:
3009 /* See if we can use movw. */
3010 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3011 return 1;
3012 else
3013 /* Otherwise, try mvn. */
3014 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3016 case PLUS:
3017 /* See if we can use addw or subw. */
3018 if (TARGET_THUMB2
3019 && ((i & 0xfffff000) == 0
3020 || ((-i) & 0xfffff000) == 0))
3021 return 1;
3022 /* else fall through. */
3024 case COMPARE:
3025 case EQ:
3026 case NE:
3027 case GT:
3028 case LE:
3029 case LT:
3030 case GE:
3031 case GEU:
3032 case LTU:
3033 case GTU:
3034 case LEU:
3035 case UNORDERED:
3036 case ORDERED:
3037 case UNEQ:
3038 case UNGE:
3039 case UNLT:
3040 case UNGT:
3041 case UNLE:
3042 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3044 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3045 case XOR:
3046 return 0;
3048 case IOR:
3049 if (TARGET_THUMB2)
3050 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3051 return 0;
3053 case AND:
3054 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3056 default:
3057 gcc_unreachable ();
3061 /* Return true if I is a valid di mode constant for the operation CODE. */
3063 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3065 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3066 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3067 rtx hi = GEN_INT (hi_val);
3068 rtx lo = GEN_INT (lo_val);
3070 if (TARGET_THUMB1)
3071 return 0;
3073 switch (code)
3075 case AND:
3076 case IOR:
3077 case XOR:
3078 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3079 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3080 case PLUS:
3081 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3083 default:
3084 return 0;
3088 /* Emit a sequence of insns to handle a large constant.
3089 CODE is the code of the operation required, it can be any of SET, PLUS,
3090 IOR, AND, XOR, MINUS;
3091 MODE is the mode in which the operation is being performed;
3092 VAL is the integer to operate on;
3093 SOURCE is the other operand (a register, or a null-pointer for SET);
3094 SUBTARGETS means it is safe to create scratch registers if that will
3095 either produce a simpler sequence, or we will want to cse the values.
3096 Return value is the number of insns emitted. */
3098 /* ??? Tweak this for thumb2. */
3100 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3101 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3103 rtx cond;
3105 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3106 cond = COND_EXEC_TEST (PATTERN (insn));
3107 else
3108 cond = NULL_RTX;
3110 if (subtargets || code == SET
3111 || (REG_P (target) && REG_P (source)
3112 && REGNO (target) != REGNO (source)))
3114 /* After arm_reorg has been called, we can't fix up expensive
3115 constants by pushing them into memory so we must synthesize
3116 them in-line, regardless of the cost. This is only likely to
3117 be more costly on chips that have load delay slots and we are
3118 compiling without running the scheduler (so no splitting
3119 occurred before the final instruction emission).
3121 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3123 if (!after_arm_reorg
3124 && !cond
3125 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3126 1, 0)
3127 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3128 + (code != SET))))
3130 if (code == SET)
3132 /* Currently SET is the only monadic value for CODE, all
3133 the rest are diadic. */
3134 if (TARGET_USE_MOVT)
3135 arm_emit_movpair (target, GEN_INT (val));
3136 else
3137 emit_set_insn (target, GEN_INT (val));
3139 return 1;
3141 else
3143 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3145 if (TARGET_USE_MOVT)
3146 arm_emit_movpair (temp, GEN_INT (val));
3147 else
3148 emit_set_insn (temp, GEN_INT (val));
3150 /* For MINUS, the value is subtracted from, since we never
3151 have subtraction of a constant. */
3152 if (code == MINUS)
3153 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3154 else
3155 emit_set_insn (target,
3156 gen_rtx_fmt_ee (code, mode, source, temp));
3157 return 2;
3162 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3166 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3167 ARM/THUMB2 immediates, and add up to VAL.
3168 Thr function return value gives the number of insns required. */
3169 static int
3170 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3171 struct four_ints *return_sequence)
3173 int best_consecutive_zeros = 0;
3174 int i;
3175 int best_start = 0;
3176 int insns1, insns2;
3177 struct four_ints tmp_sequence;
3179 /* If we aren't targeting ARM, the best place to start is always at
3180 the bottom, otherwise look more closely. */
3181 if (TARGET_ARM)
3183 for (i = 0; i < 32; i += 2)
3185 int consecutive_zeros = 0;
3187 if (!(val & (3 << i)))
3189 while ((i < 32) && !(val & (3 << i)))
3191 consecutive_zeros += 2;
3192 i += 2;
3194 if (consecutive_zeros > best_consecutive_zeros)
3196 best_consecutive_zeros = consecutive_zeros;
3197 best_start = i - consecutive_zeros;
3199 i -= 2;
3204 /* So long as it won't require any more insns to do so, it's
3205 desirable to emit a small constant (in bits 0...9) in the last
3206 insn. This way there is more chance that it can be combined with
3207 a later addressing insn to form a pre-indexed load or store
3208 operation. Consider:
3210 *((volatile int *)0xe0000100) = 1;
3211 *((volatile int *)0xe0000110) = 2;
3213 We want this to wind up as:
3215 mov rA, #0xe0000000
3216 mov rB, #1
3217 str rB, [rA, #0x100]
3218 mov rB, #2
3219 str rB, [rA, #0x110]
3221 rather than having to synthesize both large constants from scratch.
3223 Therefore, we calculate how many insns would be required to emit
3224 the constant starting from `best_start', and also starting from
3225 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3226 yield a shorter sequence, we may as well use zero. */
3227 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3228 if (best_start != 0
3229 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3231 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3232 if (insns2 <= insns1)
3234 *return_sequence = tmp_sequence;
3235 insns1 = insns2;
3239 return insns1;
3242 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3243 static int
3244 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3245 struct four_ints *return_sequence, int i)
3247 int remainder = val & 0xffffffff;
3248 int insns = 0;
3250 /* Try and find a way of doing the job in either two or three
3251 instructions.
3253 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3254 location. We start at position I. This may be the MSB, or
3255 optimial_immediate_sequence may have positioned it at the largest block
3256 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3257 wrapping around to the top of the word when we drop off the bottom.
3258 In the worst case this code should produce no more than four insns.
3260 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3261 constants, shifted to any arbitrary location. We should always start
3262 at the MSB. */
3265 int end;
3266 unsigned int b1, b2, b3, b4;
3267 unsigned HOST_WIDE_INT result;
3268 int loc;
3270 gcc_assert (insns < 4);
3272 if (i <= 0)
3273 i += 32;
3275 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3276 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3278 loc = i;
3279 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3280 /* We can use addw/subw for the last 12 bits. */
3281 result = remainder;
3282 else
3284 /* Use an 8-bit shifted/rotated immediate. */
3285 end = i - 8;
3286 if (end < 0)
3287 end += 32;
3288 result = remainder & ((0x0ff << end)
3289 | ((i < end) ? (0xff >> (32 - end))
3290 : 0));
3291 i -= 8;
3294 else
3296 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3297 arbitrary shifts. */
3298 i -= TARGET_ARM ? 2 : 1;
3299 continue;
3302 /* Next, see if we can do a better job with a thumb2 replicated
3303 constant.
3305 We do it this way around to catch the cases like 0x01F001E0 where
3306 two 8-bit immediates would work, but a replicated constant would
3307 make it worse.
3309 TODO: 16-bit constants that don't clear all the bits, but still win.
3310 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3311 if (TARGET_THUMB2)
3313 b1 = (remainder & 0xff000000) >> 24;
3314 b2 = (remainder & 0x00ff0000) >> 16;
3315 b3 = (remainder & 0x0000ff00) >> 8;
3316 b4 = remainder & 0xff;
3318 if (loc > 24)
3320 /* The 8-bit immediate already found clears b1 (and maybe b2),
3321 but must leave b3 and b4 alone. */
3323 /* First try to find a 32-bit replicated constant that clears
3324 almost everything. We can assume that we can't do it in one,
3325 or else we wouldn't be here. */
3326 unsigned int tmp = b1 & b2 & b3 & b4;
3327 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3328 + (tmp << 24);
3329 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3330 + (tmp == b3) + (tmp == b4);
3331 if (tmp
3332 && (matching_bytes >= 3
3333 || (matching_bytes == 2
3334 && const_ok_for_op (remainder & ~tmp2, code))))
3336 /* At least 3 of the bytes match, and the fourth has at
3337 least as many bits set, or two of the bytes match
3338 and it will only require one more insn to finish. */
3339 result = tmp2;
3340 i = tmp != b1 ? 32
3341 : tmp != b2 ? 24
3342 : tmp != b3 ? 16
3343 : 8;
3346 /* Second, try to find a 16-bit replicated constant that can
3347 leave three of the bytes clear. If b2 or b4 is already
3348 zero, then we can. If the 8-bit from above would not
3349 clear b2 anyway, then we still win. */
3350 else if (b1 == b3 && (!b2 || !b4
3351 || (remainder & 0x00ff0000 & ~result)))
3353 result = remainder & 0xff00ff00;
3354 i = 24;
3357 else if (loc > 16)
3359 /* The 8-bit immediate already found clears b2 (and maybe b3)
3360 and we don't get here unless b1 is alredy clear, but it will
3361 leave b4 unchanged. */
3363 /* If we can clear b2 and b4 at once, then we win, since the
3364 8-bits couldn't possibly reach that far. */
3365 if (b2 == b4)
3367 result = remainder & 0x00ff00ff;
3368 i = 16;
3373 return_sequence->i[insns++] = result;
3374 remainder &= ~result;
3376 if (code == SET || code == MINUS)
3377 code = PLUS;
3379 while (remainder);
3381 return insns;
3384 /* Emit an instruction with the indicated PATTERN. If COND is
3385 non-NULL, conditionalize the execution of the instruction on COND
3386 being true. */
3388 static void
3389 emit_constant_insn (rtx cond, rtx pattern)
3391 if (cond)
3392 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3393 emit_insn (pattern);
3396 /* As above, but extra parameter GENERATE which, if clear, suppresses
3397 RTL generation. */
3399 static int
3400 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3401 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3402 int generate)
3404 int can_invert = 0;
3405 int can_negate = 0;
3406 int final_invert = 0;
3407 int i;
3408 int set_sign_bit_copies = 0;
3409 int clear_sign_bit_copies = 0;
3410 int clear_zero_bit_copies = 0;
3411 int set_zero_bit_copies = 0;
3412 int insns = 0, neg_insns, inv_insns;
3413 unsigned HOST_WIDE_INT temp1, temp2;
3414 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3415 struct four_ints *immediates;
3416 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3418 /* Find out which operations are safe for a given CODE. Also do a quick
3419 check for degenerate cases; these can occur when DImode operations
3420 are split. */
3421 switch (code)
3423 case SET:
3424 can_invert = 1;
3425 break;
3427 case PLUS:
3428 can_negate = 1;
3429 break;
3431 case IOR:
3432 if (remainder == 0xffffffff)
3434 if (generate)
3435 emit_constant_insn (cond,
3436 gen_rtx_SET (VOIDmode, target,
3437 GEN_INT (ARM_SIGN_EXTEND (val))));
3438 return 1;
3441 if (remainder == 0)
3443 if (reload_completed && rtx_equal_p (target, source))
3444 return 0;
3446 if (generate)
3447 emit_constant_insn (cond,
3448 gen_rtx_SET (VOIDmode, target, source));
3449 return 1;
3451 break;
3453 case AND:
3454 if (remainder == 0)
3456 if (generate)
3457 emit_constant_insn (cond,
3458 gen_rtx_SET (VOIDmode, target, const0_rtx));
3459 return 1;
3461 if (remainder == 0xffffffff)
3463 if (reload_completed && rtx_equal_p (target, source))
3464 return 0;
3465 if (generate)
3466 emit_constant_insn (cond,
3467 gen_rtx_SET (VOIDmode, target, source));
3468 return 1;
3470 can_invert = 1;
3471 break;
3473 case XOR:
3474 if (remainder == 0)
3476 if (reload_completed && rtx_equal_p (target, source))
3477 return 0;
3478 if (generate)
3479 emit_constant_insn (cond,
3480 gen_rtx_SET (VOIDmode, target, source));
3481 return 1;
3484 if (remainder == 0xffffffff)
3486 if (generate)
3487 emit_constant_insn (cond,
3488 gen_rtx_SET (VOIDmode, target,
3489 gen_rtx_NOT (mode, source)));
3490 return 1;
3492 final_invert = 1;
3493 break;
3495 case MINUS:
3496 /* We treat MINUS as (val - source), since (source - val) is always
3497 passed as (source + (-val)). */
3498 if (remainder == 0)
3500 if (generate)
3501 emit_constant_insn (cond,
3502 gen_rtx_SET (VOIDmode, target,
3503 gen_rtx_NEG (mode, source)));
3504 return 1;
3506 if (const_ok_for_arm (val))
3508 if (generate)
3509 emit_constant_insn (cond,
3510 gen_rtx_SET (VOIDmode, target,
3511 gen_rtx_MINUS (mode, GEN_INT (val),
3512 source)));
3513 return 1;
3516 break;
3518 default:
3519 gcc_unreachable ();
3522 /* If we can do it in one insn get out quickly. */
3523 if (const_ok_for_op (val, code))
3525 if (generate)
3526 emit_constant_insn (cond,
3527 gen_rtx_SET (VOIDmode, target,
3528 (source
3529 ? gen_rtx_fmt_ee (code, mode, source,
3530 GEN_INT (val))
3531 : GEN_INT (val))));
3532 return 1;
3535 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3536 insn. */
3537 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3538 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3540 if (generate)
3542 if (mode == SImode && i == 16)
3543 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3544 smaller insn. */
3545 emit_constant_insn (cond,
3546 gen_zero_extendhisi2
3547 (target, gen_lowpart (HImode, source)));
3548 else
3549 /* Extz only supports SImode, but we can coerce the operands
3550 into that mode. */
3551 emit_constant_insn (cond,
3552 gen_extzv_t2 (gen_lowpart (SImode, target),
3553 gen_lowpart (SImode, source),
3554 GEN_INT (i), const0_rtx));
3557 return 1;
3560 /* Calculate a few attributes that may be useful for specific
3561 optimizations. */
3562 /* Count number of leading zeros. */
3563 for (i = 31; i >= 0; i--)
3565 if ((remainder & (1 << i)) == 0)
3566 clear_sign_bit_copies++;
3567 else
3568 break;
3571 /* Count number of leading 1's. */
3572 for (i = 31; i >= 0; i--)
3574 if ((remainder & (1 << i)) != 0)
3575 set_sign_bit_copies++;
3576 else
3577 break;
3580 /* Count number of trailing zero's. */
3581 for (i = 0; i <= 31; i++)
3583 if ((remainder & (1 << i)) == 0)
3584 clear_zero_bit_copies++;
3585 else
3586 break;
3589 /* Count number of trailing 1's. */
3590 for (i = 0; i <= 31; i++)
3592 if ((remainder & (1 << i)) != 0)
3593 set_zero_bit_copies++;
3594 else
3595 break;
3598 switch (code)
3600 case SET:
3601 /* See if we can do this by sign_extending a constant that is known
3602 to be negative. This is a good, way of doing it, since the shift
3603 may well merge into a subsequent insn. */
3604 if (set_sign_bit_copies > 1)
3606 if (const_ok_for_arm
3607 (temp1 = ARM_SIGN_EXTEND (remainder
3608 << (set_sign_bit_copies - 1))))
3610 if (generate)
3612 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3613 emit_constant_insn (cond,
3614 gen_rtx_SET (VOIDmode, new_src,
3615 GEN_INT (temp1)));
3616 emit_constant_insn (cond,
3617 gen_ashrsi3 (target, new_src,
3618 GEN_INT (set_sign_bit_copies - 1)));
3620 return 2;
3622 /* For an inverted constant, we will need to set the low bits,
3623 these will be shifted out of harm's way. */
3624 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3625 if (const_ok_for_arm (~temp1))
3627 if (generate)
3629 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3630 emit_constant_insn (cond,
3631 gen_rtx_SET (VOIDmode, new_src,
3632 GEN_INT (temp1)));
3633 emit_constant_insn (cond,
3634 gen_ashrsi3 (target, new_src,
3635 GEN_INT (set_sign_bit_copies - 1)));
3637 return 2;
3641 /* See if we can calculate the value as the difference between two
3642 valid immediates. */
3643 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3645 int topshift = clear_sign_bit_copies & ~1;
3647 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3648 & (0xff000000 >> topshift));
3650 /* If temp1 is zero, then that means the 9 most significant
3651 bits of remainder were 1 and we've caused it to overflow.
3652 When topshift is 0 we don't need to do anything since we
3653 can borrow from 'bit 32'. */
3654 if (temp1 == 0 && topshift != 0)
3655 temp1 = 0x80000000 >> (topshift - 1);
3657 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3659 if (const_ok_for_arm (temp2))
3661 if (generate)
3663 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3664 emit_constant_insn (cond,
3665 gen_rtx_SET (VOIDmode, new_src,
3666 GEN_INT (temp1)));
3667 emit_constant_insn (cond,
3668 gen_addsi3 (target, new_src,
3669 GEN_INT (-temp2)));
3672 return 2;
3676 /* See if we can generate this by setting the bottom (or the top)
3677 16 bits, and then shifting these into the other half of the
3678 word. We only look for the simplest cases, to do more would cost
3679 too much. Be careful, however, not to generate this when the
3680 alternative would take fewer insns. */
3681 if (val & 0xffff0000)
3683 temp1 = remainder & 0xffff0000;
3684 temp2 = remainder & 0x0000ffff;
3686 /* Overlaps outside this range are best done using other methods. */
3687 for (i = 9; i < 24; i++)
3689 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3690 && !const_ok_for_arm (temp2))
3692 rtx new_src = (subtargets
3693 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3694 : target);
3695 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3696 source, subtargets, generate);
3697 source = new_src;
3698 if (generate)
3699 emit_constant_insn
3700 (cond,
3701 gen_rtx_SET
3702 (VOIDmode, target,
3703 gen_rtx_IOR (mode,
3704 gen_rtx_ASHIFT (mode, source,
3705 GEN_INT (i)),
3706 source)));
3707 return insns + 1;
3711 /* Don't duplicate cases already considered. */
3712 for (i = 17; i < 24; i++)
3714 if (((temp1 | (temp1 >> i)) == remainder)
3715 && !const_ok_for_arm (temp1))
3717 rtx new_src = (subtargets
3718 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3719 : target);
3720 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3721 source, subtargets, generate);
3722 source = new_src;
3723 if (generate)
3724 emit_constant_insn
3725 (cond,
3726 gen_rtx_SET (VOIDmode, target,
3727 gen_rtx_IOR
3728 (mode,
3729 gen_rtx_LSHIFTRT (mode, source,
3730 GEN_INT (i)),
3731 source)));
3732 return insns + 1;
3736 break;
3738 case IOR:
3739 case XOR:
3740 /* If we have IOR or XOR, and the constant can be loaded in a
3741 single instruction, and we can find a temporary to put it in,
3742 then this can be done in two instructions instead of 3-4. */
3743 if (subtargets
3744 /* TARGET can't be NULL if SUBTARGETS is 0 */
3745 || (reload_completed && !reg_mentioned_p (target, source)))
3747 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3749 if (generate)
3751 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3753 emit_constant_insn (cond,
3754 gen_rtx_SET (VOIDmode, sub,
3755 GEN_INT (val)));
3756 emit_constant_insn (cond,
3757 gen_rtx_SET (VOIDmode, target,
3758 gen_rtx_fmt_ee (code, mode,
3759 source, sub)));
3761 return 2;
3765 if (code == XOR)
3766 break;
3768 /* Convert.
3769 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3770 and the remainder 0s for e.g. 0xfff00000)
3771 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3773 This can be done in 2 instructions by using shifts with mov or mvn.
3774 e.g. for
3775 x = x | 0xfff00000;
3776 we generate.
3777 mvn r0, r0, asl #12
3778 mvn r0, r0, lsr #12 */
3779 if (set_sign_bit_copies > 8
3780 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3782 if (generate)
3784 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3785 rtx shift = GEN_INT (set_sign_bit_copies);
3787 emit_constant_insn
3788 (cond,
3789 gen_rtx_SET (VOIDmode, sub,
3790 gen_rtx_NOT (mode,
3791 gen_rtx_ASHIFT (mode,
3792 source,
3793 shift))));
3794 emit_constant_insn
3795 (cond,
3796 gen_rtx_SET (VOIDmode, target,
3797 gen_rtx_NOT (mode,
3798 gen_rtx_LSHIFTRT (mode, sub,
3799 shift))));
3801 return 2;
3804 /* Convert
3805 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3807 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3809 For eg. r0 = r0 | 0xfff
3810 mvn r0, r0, lsr #12
3811 mvn r0, r0, asl #12
3814 if (set_zero_bit_copies > 8
3815 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3817 if (generate)
3819 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3820 rtx shift = GEN_INT (set_zero_bit_copies);
3822 emit_constant_insn
3823 (cond,
3824 gen_rtx_SET (VOIDmode, sub,
3825 gen_rtx_NOT (mode,
3826 gen_rtx_LSHIFTRT (mode,
3827 source,
3828 shift))));
3829 emit_constant_insn
3830 (cond,
3831 gen_rtx_SET (VOIDmode, target,
3832 gen_rtx_NOT (mode,
3833 gen_rtx_ASHIFT (mode, sub,
3834 shift))));
3836 return 2;
3839 /* This will never be reached for Thumb2 because orn is a valid
3840 instruction. This is for Thumb1 and the ARM 32 bit cases.
3842 x = y | constant (such that ~constant is a valid constant)
3843 Transform this to
3844 x = ~(~y & ~constant).
3846 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3848 if (generate)
3850 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3851 emit_constant_insn (cond,
3852 gen_rtx_SET (VOIDmode, sub,
3853 gen_rtx_NOT (mode, source)));
3854 source = sub;
3855 if (subtargets)
3856 sub = gen_reg_rtx (mode);
3857 emit_constant_insn (cond,
3858 gen_rtx_SET (VOIDmode, sub,
3859 gen_rtx_AND (mode, source,
3860 GEN_INT (temp1))));
3861 emit_constant_insn (cond,
3862 gen_rtx_SET (VOIDmode, target,
3863 gen_rtx_NOT (mode, sub)));
3865 return 3;
3867 break;
3869 case AND:
3870 /* See if two shifts will do 2 or more insn's worth of work. */
3871 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3873 HOST_WIDE_INT shift_mask = ((0xffffffff
3874 << (32 - clear_sign_bit_copies))
3875 & 0xffffffff);
3877 if ((remainder | shift_mask) != 0xffffffff)
3879 if (generate)
3881 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3882 insns = arm_gen_constant (AND, mode, cond,
3883 remainder | shift_mask,
3884 new_src, source, subtargets, 1);
3885 source = new_src;
3887 else
3889 rtx targ = subtargets ? NULL_RTX : target;
3890 insns = arm_gen_constant (AND, mode, cond,
3891 remainder | shift_mask,
3892 targ, source, subtargets, 0);
3896 if (generate)
3898 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3899 rtx shift = GEN_INT (clear_sign_bit_copies);
3901 emit_insn (gen_ashlsi3 (new_src, source, shift));
3902 emit_insn (gen_lshrsi3 (target, new_src, shift));
3905 return insns + 2;
3908 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3910 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3912 if ((remainder | shift_mask) != 0xffffffff)
3914 if (generate)
3916 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3918 insns = arm_gen_constant (AND, mode, cond,
3919 remainder | shift_mask,
3920 new_src, source, subtargets, 1);
3921 source = new_src;
3923 else
3925 rtx targ = subtargets ? NULL_RTX : target;
3927 insns = arm_gen_constant (AND, mode, cond,
3928 remainder | shift_mask,
3929 targ, source, subtargets, 0);
3933 if (generate)
3935 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3936 rtx shift = GEN_INT (clear_zero_bit_copies);
3938 emit_insn (gen_lshrsi3 (new_src, source, shift));
3939 emit_insn (gen_ashlsi3 (target, new_src, shift));
3942 return insns + 2;
3945 break;
3947 default:
3948 break;
3951 /* Calculate what the instruction sequences would be if we generated it
3952 normally, negated, or inverted. */
3953 if (code == AND)
3954 /* AND cannot be split into multiple insns, so invert and use BIC. */
3955 insns = 99;
3956 else
3957 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3959 if (can_negate)
3960 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3961 &neg_immediates);
3962 else
3963 neg_insns = 99;
3965 if (can_invert || final_invert)
3966 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3967 &inv_immediates);
3968 else
3969 inv_insns = 99;
3971 immediates = &pos_immediates;
3973 /* Is the negated immediate sequence more efficient? */
3974 if (neg_insns < insns && neg_insns <= inv_insns)
3976 insns = neg_insns;
3977 immediates = &neg_immediates;
3979 else
3980 can_negate = 0;
3982 /* Is the inverted immediate sequence more efficient?
3983 We must allow for an extra NOT instruction for XOR operations, although
3984 there is some chance that the final 'mvn' will get optimized later. */
3985 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3987 insns = inv_insns;
3988 immediates = &inv_immediates;
3990 else
3992 can_invert = 0;
3993 final_invert = 0;
3996 /* Now output the chosen sequence as instructions. */
3997 if (generate)
3999 for (i = 0; i < insns; i++)
4001 rtx new_src, temp1_rtx;
4003 temp1 = immediates->i[i];
4005 if (code == SET || code == MINUS)
4006 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4007 else if ((final_invert || i < (insns - 1)) && subtargets)
4008 new_src = gen_reg_rtx (mode);
4009 else
4010 new_src = target;
4012 if (can_invert)
4013 temp1 = ~temp1;
4014 else if (can_negate)
4015 temp1 = -temp1;
4017 temp1 = trunc_int_for_mode (temp1, mode);
4018 temp1_rtx = GEN_INT (temp1);
4020 if (code == SET)
4022 else if (code == MINUS)
4023 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4024 else
4025 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4027 emit_constant_insn (cond,
4028 gen_rtx_SET (VOIDmode, new_src,
4029 temp1_rtx));
4030 source = new_src;
4032 if (code == SET)
4034 can_negate = can_invert;
4035 can_invert = 0;
4036 code = PLUS;
4038 else if (code == MINUS)
4039 code = PLUS;
4043 if (final_invert)
4045 if (generate)
4046 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4047 gen_rtx_NOT (mode, source)));
4048 insns++;
4051 return insns;
4054 /* Canonicalize a comparison so that we are more likely to recognize it.
4055 This can be done for a few constant compares, where we can make the
4056 immediate value easier to load. */
4058 static void
4059 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4060 bool op0_preserve_value)
4062 enum machine_mode mode;
4063 unsigned HOST_WIDE_INT i, maxval;
4065 mode = GET_MODE (*op0);
4066 if (mode == VOIDmode)
4067 mode = GET_MODE (*op1);
4069 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4071 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4072 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4073 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4074 for GTU/LEU in Thumb mode. */
4075 if (mode == DImode)
4077 rtx tem;
4079 if (*code == GT || *code == LE
4080 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4082 /* Missing comparison. First try to use an available
4083 comparison. */
4084 if (CONST_INT_P (*op1))
4086 i = INTVAL (*op1);
4087 switch (*code)
4089 case GT:
4090 case LE:
4091 if (i != maxval
4092 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4094 *op1 = GEN_INT (i + 1);
4095 *code = *code == GT ? GE : LT;
4096 return;
4098 break;
4099 case GTU:
4100 case LEU:
4101 if (i != ~((unsigned HOST_WIDE_INT) 0)
4102 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4104 *op1 = GEN_INT (i + 1);
4105 *code = *code == GTU ? GEU : LTU;
4106 return;
4108 break;
4109 default:
4110 gcc_unreachable ();
4114 /* If that did not work, reverse the condition. */
4115 if (!op0_preserve_value)
4117 tem = *op0;
4118 *op0 = *op1;
4119 *op1 = tem;
4120 *code = (int)swap_condition ((enum rtx_code)*code);
4123 return;
4126 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4127 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4128 to facilitate possible combining with a cmp into 'ands'. */
4129 if (mode == SImode
4130 && GET_CODE (*op0) == ZERO_EXTEND
4131 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4132 && GET_MODE (XEXP (*op0, 0)) == QImode
4133 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4134 && subreg_lowpart_p (XEXP (*op0, 0))
4135 && *op1 == const0_rtx)
4136 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4137 GEN_INT (255));
4139 /* Comparisons smaller than DImode. Only adjust comparisons against
4140 an out-of-range constant. */
4141 if (!CONST_INT_P (*op1)
4142 || const_ok_for_arm (INTVAL (*op1))
4143 || const_ok_for_arm (- INTVAL (*op1)))
4144 return;
4146 i = INTVAL (*op1);
4148 switch (*code)
4150 case EQ:
4151 case NE:
4152 return;
4154 case GT:
4155 case LE:
4156 if (i != maxval
4157 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4159 *op1 = GEN_INT (i + 1);
4160 *code = *code == GT ? GE : LT;
4161 return;
4163 break;
4165 case GE:
4166 case LT:
4167 if (i != ~maxval
4168 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4170 *op1 = GEN_INT (i - 1);
4171 *code = *code == GE ? GT : LE;
4172 return;
4174 break;
4176 case GTU:
4177 case LEU:
4178 if (i != ~((unsigned HOST_WIDE_INT) 0)
4179 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4181 *op1 = GEN_INT (i + 1);
4182 *code = *code == GTU ? GEU : LTU;
4183 return;
4185 break;
4187 case GEU:
4188 case LTU:
4189 if (i != 0
4190 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4192 *op1 = GEN_INT (i - 1);
4193 *code = *code == GEU ? GTU : LEU;
4194 return;
4196 break;
4198 default:
4199 gcc_unreachable ();
4204 /* Define how to find the value returned by a function. */
4206 static rtx
4207 arm_function_value(const_tree type, const_tree func,
4208 bool outgoing ATTRIBUTE_UNUSED)
4210 enum machine_mode mode;
4211 int unsignedp ATTRIBUTE_UNUSED;
4212 rtx r ATTRIBUTE_UNUSED;
4214 mode = TYPE_MODE (type);
4216 if (TARGET_AAPCS_BASED)
4217 return aapcs_allocate_return_reg (mode, type, func);
4219 /* Promote integer types. */
4220 if (INTEGRAL_TYPE_P (type))
4221 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4223 /* Promotes small structs returned in a register to full-word size
4224 for big-endian AAPCS. */
4225 if (arm_return_in_msb (type))
4227 HOST_WIDE_INT size = int_size_in_bytes (type);
4228 if (size % UNITS_PER_WORD != 0)
4230 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4231 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4235 return arm_libcall_value_1 (mode);
4238 /* libcall hashtable helpers. */
4240 struct libcall_hasher : typed_noop_remove <rtx_def>
4242 typedef rtx_def value_type;
4243 typedef rtx_def compare_type;
4244 static inline hashval_t hash (const value_type *);
4245 static inline bool equal (const value_type *, const compare_type *);
4246 static inline void remove (value_type *);
4249 inline bool
4250 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4252 return rtx_equal_p (p1, p2);
4255 inline hashval_t
4256 libcall_hasher::hash (const value_type *p1)
4258 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4261 typedef hash_table <libcall_hasher> libcall_table_type;
4263 static void
4264 add_libcall (libcall_table_type htab, rtx libcall)
4266 *htab.find_slot (libcall, INSERT) = libcall;
4269 static bool
4270 arm_libcall_uses_aapcs_base (const_rtx libcall)
4272 static bool init_done = false;
4273 static libcall_table_type libcall_htab;
4275 if (!init_done)
4277 init_done = true;
4279 libcall_htab.create (31);
4280 add_libcall (libcall_htab,
4281 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4282 add_libcall (libcall_htab,
4283 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4284 add_libcall (libcall_htab,
4285 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4286 add_libcall (libcall_htab,
4287 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4289 add_libcall (libcall_htab,
4290 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4291 add_libcall (libcall_htab,
4292 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4293 add_libcall (libcall_htab,
4294 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4295 add_libcall (libcall_htab,
4296 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4298 add_libcall (libcall_htab,
4299 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4300 add_libcall (libcall_htab,
4301 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4302 add_libcall (libcall_htab,
4303 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4304 add_libcall (libcall_htab,
4305 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4306 add_libcall (libcall_htab,
4307 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4308 add_libcall (libcall_htab,
4309 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4310 add_libcall (libcall_htab,
4311 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4312 add_libcall (libcall_htab,
4313 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4315 /* Values from double-precision helper functions are returned in core
4316 registers if the selected core only supports single-precision
4317 arithmetic, even if we are using the hard-float ABI. The same is
4318 true for single-precision helpers, but we will never be using the
4319 hard-float ABI on a CPU which doesn't support single-precision
4320 operations in hardware. */
4321 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4322 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4323 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4324 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4325 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4326 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4327 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4328 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4329 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4330 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4331 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4332 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4333 SFmode));
4334 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4335 DFmode));
4338 return libcall && libcall_htab.find (libcall) != NULL;
4341 static rtx
4342 arm_libcall_value_1 (enum machine_mode mode)
4344 if (TARGET_AAPCS_BASED)
4345 return aapcs_libcall_value (mode);
4346 else if (TARGET_IWMMXT_ABI
4347 && arm_vector_mode_supported_p (mode))
4348 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4349 else
4350 return gen_rtx_REG (mode, ARG_REGISTER (1));
4353 /* Define how to find the value returned by a library function
4354 assuming the value has mode MODE. */
4356 static rtx
4357 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4359 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4360 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4362 /* The following libcalls return their result in integer registers,
4363 even though they return a floating point value. */
4364 if (arm_libcall_uses_aapcs_base (libcall))
4365 return gen_rtx_REG (mode, ARG_REGISTER(1));
4369 return arm_libcall_value_1 (mode);
4372 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4374 static bool
4375 arm_function_value_regno_p (const unsigned int regno)
4377 if (regno == ARG_REGISTER (1)
4378 || (TARGET_32BIT
4379 && TARGET_AAPCS_BASED
4380 && TARGET_VFP
4381 && TARGET_HARD_FLOAT
4382 && regno == FIRST_VFP_REGNUM)
4383 || (TARGET_IWMMXT_ABI
4384 && regno == FIRST_IWMMXT_REGNUM))
4385 return true;
4387 return false;
4390 /* Determine the amount of memory needed to store the possible return
4391 registers of an untyped call. */
4393 arm_apply_result_size (void)
4395 int size = 16;
4397 if (TARGET_32BIT)
4399 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4400 size += 32;
4401 if (TARGET_IWMMXT_ABI)
4402 size += 8;
4405 return size;
4408 /* Decide whether TYPE should be returned in memory (true)
4409 or in a register (false). FNTYPE is the type of the function making
4410 the call. */
4411 static bool
4412 arm_return_in_memory (const_tree type, const_tree fntype)
4414 HOST_WIDE_INT size;
4416 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4418 if (TARGET_AAPCS_BASED)
4420 /* Simple, non-aggregate types (ie not including vectors and
4421 complex) are always returned in a register (or registers).
4422 We don't care about which register here, so we can short-cut
4423 some of the detail. */
4424 if (!AGGREGATE_TYPE_P (type)
4425 && TREE_CODE (type) != VECTOR_TYPE
4426 && TREE_CODE (type) != COMPLEX_TYPE)
4427 return false;
4429 /* Any return value that is no larger than one word can be
4430 returned in r0. */
4431 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4432 return false;
4434 /* Check any available co-processors to see if they accept the
4435 type as a register candidate (VFP, for example, can return
4436 some aggregates in consecutive registers). These aren't
4437 available if the call is variadic. */
4438 if (aapcs_select_return_coproc (type, fntype) >= 0)
4439 return false;
4441 /* Vector values should be returned using ARM registers, not
4442 memory (unless they're over 16 bytes, which will break since
4443 we only have four call-clobbered registers to play with). */
4444 if (TREE_CODE (type) == VECTOR_TYPE)
4445 return (size < 0 || size > (4 * UNITS_PER_WORD));
4447 /* The rest go in memory. */
4448 return true;
4451 if (TREE_CODE (type) == VECTOR_TYPE)
4452 return (size < 0 || size > (4 * UNITS_PER_WORD));
4454 if (!AGGREGATE_TYPE_P (type) &&
4455 (TREE_CODE (type) != VECTOR_TYPE))
4456 /* All simple types are returned in registers. */
4457 return false;
4459 if (arm_abi != ARM_ABI_APCS)
4461 /* ATPCS and later return aggregate types in memory only if they are
4462 larger than a word (or are variable size). */
4463 return (size < 0 || size > UNITS_PER_WORD);
4466 /* For the arm-wince targets we choose to be compatible with Microsoft's
4467 ARM and Thumb compilers, which always return aggregates in memory. */
4468 #ifndef ARM_WINCE
4469 /* All structures/unions bigger than one word are returned in memory.
4470 Also catch the case where int_size_in_bytes returns -1. In this case
4471 the aggregate is either huge or of variable size, and in either case
4472 we will want to return it via memory and not in a register. */
4473 if (size < 0 || size > UNITS_PER_WORD)
4474 return true;
4476 if (TREE_CODE (type) == RECORD_TYPE)
4478 tree field;
4480 /* For a struct the APCS says that we only return in a register
4481 if the type is 'integer like' and every addressable element
4482 has an offset of zero. For practical purposes this means
4483 that the structure can have at most one non bit-field element
4484 and that this element must be the first one in the structure. */
4486 /* Find the first field, ignoring non FIELD_DECL things which will
4487 have been created by C++. */
4488 for (field = TYPE_FIELDS (type);
4489 field && TREE_CODE (field) != FIELD_DECL;
4490 field = DECL_CHAIN (field))
4491 continue;
4493 if (field == NULL)
4494 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4496 /* Check that the first field is valid for returning in a register. */
4498 /* ... Floats are not allowed */
4499 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4500 return true;
4502 /* ... Aggregates that are not themselves valid for returning in
4503 a register are not allowed. */
4504 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4505 return true;
4507 /* Now check the remaining fields, if any. Only bitfields are allowed,
4508 since they are not addressable. */
4509 for (field = DECL_CHAIN (field);
4510 field;
4511 field = DECL_CHAIN (field))
4513 if (TREE_CODE (field) != FIELD_DECL)
4514 continue;
4516 if (!DECL_BIT_FIELD_TYPE (field))
4517 return true;
4520 return false;
4523 if (TREE_CODE (type) == UNION_TYPE)
4525 tree field;
4527 /* Unions can be returned in registers if every element is
4528 integral, or can be returned in an integer register. */
4529 for (field = TYPE_FIELDS (type);
4530 field;
4531 field = DECL_CHAIN (field))
4533 if (TREE_CODE (field) != FIELD_DECL)
4534 continue;
4536 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4537 return true;
4539 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4540 return true;
4543 return false;
4545 #endif /* not ARM_WINCE */
4547 /* Return all other types in memory. */
4548 return true;
4551 const struct pcs_attribute_arg
4553 const char *arg;
4554 enum arm_pcs value;
4555 } pcs_attribute_args[] =
4557 {"aapcs", ARM_PCS_AAPCS},
4558 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4559 #if 0
4560 /* We could recognize these, but changes would be needed elsewhere
4561 * to implement them. */
4562 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4563 {"atpcs", ARM_PCS_ATPCS},
4564 {"apcs", ARM_PCS_APCS},
4565 #endif
4566 {NULL, ARM_PCS_UNKNOWN}
4569 static enum arm_pcs
4570 arm_pcs_from_attribute (tree attr)
4572 const struct pcs_attribute_arg *ptr;
4573 const char *arg;
4575 /* Get the value of the argument. */
4576 if (TREE_VALUE (attr) == NULL_TREE
4577 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4578 return ARM_PCS_UNKNOWN;
4580 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4582 /* Check it against the list of known arguments. */
4583 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4584 if (streq (arg, ptr->arg))
4585 return ptr->value;
4587 /* An unrecognized interrupt type. */
4588 return ARM_PCS_UNKNOWN;
4591 /* Get the PCS variant to use for this call. TYPE is the function's type
4592 specification, DECL is the specific declartion. DECL may be null if
4593 the call could be indirect or if this is a library call. */
4594 static enum arm_pcs
4595 arm_get_pcs_model (const_tree type, const_tree decl)
4597 bool user_convention = false;
4598 enum arm_pcs user_pcs = arm_pcs_default;
4599 tree attr;
4601 gcc_assert (type);
4603 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4604 if (attr)
4606 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4607 user_convention = true;
4610 if (TARGET_AAPCS_BASED)
4612 /* Detect varargs functions. These always use the base rules
4613 (no argument is ever a candidate for a co-processor
4614 register). */
4615 bool base_rules = stdarg_p (type);
4617 if (user_convention)
4619 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4620 sorry ("non-AAPCS derived PCS variant");
4621 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4622 error ("variadic functions must use the base AAPCS variant");
4625 if (base_rules)
4626 return ARM_PCS_AAPCS;
4627 else if (user_convention)
4628 return user_pcs;
4629 else if (decl && flag_unit_at_a_time)
4631 /* Local functions never leak outside this compilation unit,
4632 so we are free to use whatever conventions are
4633 appropriate. */
4634 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4635 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4636 if (i && i->local)
4637 return ARM_PCS_AAPCS_LOCAL;
4640 else if (user_convention && user_pcs != arm_pcs_default)
4641 sorry ("PCS variant");
4643 /* For everything else we use the target's default. */
4644 return arm_pcs_default;
4648 static void
4649 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4650 const_tree fntype ATTRIBUTE_UNUSED,
4651 rtx libcall ATTRIBUTE_UNUSED,
4652 const_tree fndecl ATTRIBUTE_UNUSED)
4654 /* Record the unallocated VFP registers. */
4655 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4656 pcum->aapcs_vfp_reg_alloc = 0;
4659 /* Walk down the type tree of TYPE counting consecutive base elements.
4660 If *MODEP is VOIDmode, then set it to the first valid floating point
4661 type. If a non-floating point type is found, or if a floating point
4662 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4663 otherwise return the count in the sub-tree. */
4664 static int
4665 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4667 enum machine_mode mode;
4668 HOST_WIDE_INT size;
4670 switch (TREE_CODE (type))
4672 case REAL_TYPE:
4673 mode = TYPE_MODE (type);
4674 if (mode != DFmode && mode != SFmode)
4675 return -1;
4677 if (*modep == VOIDmode)
4678 *modep = mode;
4680 if (*modep == mode)
4681 return 1;
4683 break;
4685 case COMPLEX_TYPE:
4686 mode = TYPE_MODE (TREE_TYPE (type));
4687 if (mode != DFmode && mode != SFmode)
4688 return -1;
4690 if (*modep == VOIDmode)
4691 *modep = mode;
4693 if (*modep == mode)
4694 return 2;
4696 break;
4698 case VECTOR_TYPE:
4699 /* Use V2SImode and V4SImode as representatives of all 64-bit
4700 and 128-bit vector types, whether or not those modes are
4701 supported with the present options. */
4702 size = int_size_in_bytes (type);
4703 switch (size)
4705 case 8:
4706 mode = V2SImode;
4707 break;
4708 case 16:
4709 mode = V4SImode;
4710 break;
4711 default:
4712 return -1;
4715 if (*modep == VOIDmode)
4716 *modep = mode;
4718 /* Vector modes are considered to be opaque: two vectors are
4719 equivalent for the purposes of being homogeneous aggregates
4720 if they are the same size. */
4721 if (*modep == mode)
4722 return 1;
4724 break;
4726 case ARRAY_TYPE:
4728 int count;
4729 tree index = TYPE_DOMAIN (type);
4731 /* Can't handle incomplete types. */
4732 if (!COMPLETE_TYPE_P (type))
4733 return -1;
4735 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4736 if (count == -1
4737 || !index
4738 || !TYPE_MAX_VALUE (index)
4739 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4740 || !TYPE_MIN_VALUE (index)
4741 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4742 || count < 0)
4743 return -1;
4745 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4746 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4748 /* There must be no padding. */
4749 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4750 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4751 != count * GET_MODE_BITSIZE (*modep)))
4752 return -1;
4754 return count;
4757 case RECORD_TYPE:
4759 int count = 0;
4760 int sub_count;
4761 tree field;
4763 /* Can't handle incomplete types. */
4764 if (!COMPLETE_TYPE_P (type))
4765 return -1;
4767 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4769 if (TREE_CODE (field) != FIELD_DECL)
4770 continue;
4772 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4773 if (sub_count < 0)
4774 return -1;
4775 count += sub_count;
4778 /* There must be no padding. */
4779 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4780 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4781 != count * GET_MODE_BITSIZE (*modep)))
4782 return -1;
4784 return count;
4787 case UNION_TYPE:
4788 case QUAL_UNION_TYPE:
4790 /* These aren't very interesting except in a degenerate case. */
4791 int count = 0;
4792 int sub_count;
4793 tree field;
4795 /* Can't handle incomplete types. */
4796 if (!COMPLETE_TYPE_P (type))
4797 return -1;
4799 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4801 if (TREE_CODE (field) != FIELD_DECL)
4802 continue;
4804 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4805 if (sub_count < 0)
4806 return -1;
4807 count = count > sub_count ? count : sub_count;
4810 /* There must be no padding. */
4811 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4812 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4813 != count * GET_MODE_BITSIZE (*modep)))
4814 return -1;
4816 return count;
4819 default:
4820 break;
4823 return -1;
4826 /* Return true if PCS_VARIANT should use VFP registers. */
4827 static bool
4828 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4830 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4832 static bool seen_thumb1_vfp = false;
4834 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4836 sorry ("Thumb-1 hard-float VFP ABI");
4837 /* sorry() is not immediately fatal, so only display this once. */
4838 seen_thumb1_vfp = true;
4841 return true;
4844 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4845 return false;
4847 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4848 (TARGET_VFP_DOUBLE || !is_double));
4851 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4852 suitable for passing or returning in VFP registers for the PCS
4853 variant selected. If it is, then *BASE_MODE is updated to contain
4854 a machine mode describing each element of the argument's type and
4855 *COUNT to hold the number of such elements. */
4856 static bool
4857 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4858 enum machine_mode mode, const_tree type,
4859 enum machine_mode *base_mode, int *count)
4861 enum machine_mode new_mode = VOIDmode;
4863 /* If we have the type information, prefer that to working things
4864 out from the mode. */
4865 if (type)
4867 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4869 if (ag_count > 0 && ag_count <= 4)
4870 *count = ag_count;
4871 else
4872 return false;
4874 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4875 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4876 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4878 *count = 1;
4879 new_mode = mode;
4881 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4883 *count = 2;
4884 new_mode = (mode == DCmode ? DFmode : SFmode);
4886 else
4887 return false;
4890 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4891 return false;
4893 *base_mode = new_mode;
4894 return true;
4897 static bool
4898 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4899 enum machine_mode mode, const_tree type)
4901 int count ATTRIBUTE_UNUSED;
4902 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4904 if (!use_vfp_abi (pcs_variant, false))
4905 return false;
4906 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4907 &ag_mode, &count);
4910 static bool
4911 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4912 const_tree type)
4914 if (!use_vfp_abi (pcum->pcs_variant, false))
4915 return false;
4917 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4918 &pcum->aapcs_vfp_rmode,
4919 &pcum->aapcs_vfp_rcount);
4922 static bool
4923 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4924 const_tree type ATTRIBUTE_UNUSED)
4926 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4927 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4928 int regno;
4930 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4931 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4933 pcum->aapcs_vfp_reg_alloc = mask << regno;
4934 if (mode == BLKmode
4935 || (mode == TImode && ! TARGET_NEON)
4936 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4938 int i;
4939 int rcount = pcum->aapcs_vfp_rcount;
4940 int rshift = shift;
4941 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4942 rtx par;
4943 if (!TARGET_NEON)
4945 /* Avoid using unsupported vector modes. */
4946 if (rmode == V2SImode)
4947 rmode = DImode;
4948 else if (rmode == V4SImode)
4950 rmode = DImode;
4951 rcount *= 2;
4952 rshift /= 2;
4955 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4956 for (i = 0; i < rcount; i++)
4958 rtx tmp = gen_rtx_REG (rmode,
4959 FIRST_VFP_REGNUM + regno + i * rshift);
4960 tmp = gen_rtx_EXPR_LIST
4961 (VOIDmode, tmp,
4962 GEN_INT (i * GET_MODE_SIZE (rmode)));
4963 XVECEXP (par, 0, i) = tmp;
4966 pcum->aapcs_reg = par;
4968 else
4969 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4970 return true;
4972 return false;
4975 static rtx
4976 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4977 enum machine_mode mode,
4978 const_tree type ATTRIBUTE_UNUSED)
4980 if (!use_vfp_abi (pcs_variant, false))
4981 return NULL;
4983 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4985 int count;
4986 enum machine_mode ag_mode;
4987 int i;
4988 rtx par;
4989 int shift;
4991 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4992 &ag_mode, &count);
4994 if (!TARGET_NEON)
4996 if (ag_mode == V2SImode)
4997 ag_mode = DImode;
4998 else if (ag_mode == V4SImode)
5000 ag_mode = DImode;
5001 count *= 2;
5004 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5005 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5006 for (i = 0; i < count; i++)
5008 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5009 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5010 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5011 XVECEXP (par, 0, i) = tmp;
5014 return par;
5017 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5020 static void
5021 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5022 enum machine_mode mode ATTRIBUTE_UNUSED,
5023 const_tree type ATTRIBUTE_UNUSED)
5025 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5026 pcum->aapcs_vfp_reg_alloc = 0;
5027 return;
5030 #define AAPCS_CP(X) \
5032 aapcs_ ## X ## _cum_init, \
5033 aapcs_ ## X ## _is_call_candidate, \
5034 aapcs_ ## X ## _allocate, \
5035 aapcs_ ## X ## _is_return_candidate, \
5036 aapcs_ ## X ## _allocate_return_reg, \
5037 aapcs_ ## X ## _advance \
5040 /* Table of co-processors that can be used to pass arguments in
5041 registers. Idealy no arugment should be a candidate for more than
5042 one co-processor table entry, but the table is processed in order
5043 and stops after the first match. If that entry then fails to put
5044 the argument into a co-processor register, the argument will go on
5045 the stack. */
5046 static struct
5048 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5049 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5051 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5052 BLKmode) is a candidate for this co-processor's registers; this
5053 function should ignore any position-dependent state in
5054 CUMULATIVE_ARGS and only use call-type dependent information. */
5055 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5057 /* Return true if the argument does get a co-processor register; it
5058 should set aapcs_reg to an RTX of the register allocated as is
5059 required for a return from FUNCTION_ARG. */
5060 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5062 /* Return true if a result of mode MODE (or type TYPE if MODE is
5063 BLKmode) is can be returned in this co-processor's registers. */
5064 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5066 /* Allocate and return an RTX element to hold the return type of a
5067 call, this routine must not fail and will only be called if
5068 is_return_candidate returned true with the same parameters. */
5069 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5071 /* Finish processing this argument and prepare to start processing
5072 the next one. */
5073 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5074 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5076 AAPCS_CP(vfp)
5079 #undef AAPCS_CP
5081 static int
5082 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5083 const_tree type)
5085 int i;
5087 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5088 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5089 return i;
5091 return -1;
5094 static int
5095 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5097 /* We aren't passed a decl, so we can't check that a call is local.
5098 However, it isn't clear that that would be a win anyway, since it
5099 might limit some tail-calling opportunities. */
5100 enum arm_pcs pcs_variant;
5102 if (fntype)
5104 const_tree fndecl = NULL_TREE;
5106 if (TREE_CODE (fntype) == FUNCTION_DECL)
5108 fndecl = fntype;
5109 fntype = TREE_TYPE (fntype);
5112 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5114 else
5115 pcs_variant = arm_pcs_default;
5117 if (pcs_variant != ARM_PCS_AAPCS)
5119 int i;
5121 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5122 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5123 TYPE_MODE (type),
5124 type))
5125 return i;
5127 return -1;
5130 static rtx
5131 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5132 const_tree fntype)
5134 /* We aren't passed a decl, so we can't check that a call is local.
5135 However, it isn't clear that that would be a win anyway, since it
5136 might limit some tail-calling opportunities. */
5137 enum arm_pcs pcs_variant;
5138 int unsignedp ATTRIBUTE_UNUSED;
5140 if (fntype)
5142 const_tree fndecl = NULL_TREE;
5144 if (TREE_CODE (fntype) == FUNCTION_DECL)
5146 fndecl = fntype;
5147 fntype = TREE_TYPE (fntype);
5150 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5152 else
5153 pcs_variant = arm_pcs_default;
5155 /* Promote integer types. */
5156 if (type && INTEGRAL_TYPE_P (type))
5157 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5159 if (pcs_variant != ARM_PCS_AAPCS)
5161 int i;
5163 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5164 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5165 type))
5166 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5167 mode, type);
5170 /* Promotes small structs returned in a register to full-word size
5171 for big-endian AAPCS. */
5172 if (type && arm_return_in_msb (type))
5174 HOST_WIDE_INT size = int_size_in_bytes (type);
5175 if (size % UNITS_PER_WORD != 0)
5177 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5178 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5182 return gen_rtx_REG (mode, R0_REGNUM);
5185 static rtx
5186 aapcs_libcall_value (enum machine_mode mode)
5188 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5189 && GET_MODE_SIZE (mode) <= 4)
5190 mode = SImode;
5192 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5195 /* Lay out a function argument using the AAPCS rules. The rule
5196 numbers referred to here are those in the AAPCS. */
5197 static void
5198 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5199 const_tree type, bool named)
5201 int nregs, nregs2;
5202 int ncrn;
5204 /* We only need to do this once per argument. */
5205 if (pcum->aapcs_arg_processed)
5206 return;
5208 pcum->aapcs_arg_processed = true;
5210 /* Special case: if named is false then we are handling an incoming
5211 anonymous argument which is on the stack. */
5212 if (!named)
5213 return;
5215 /* Is this a potential co-processor register candidate? */
5216 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5218 int slot = aapcs_select_call_coproc (pcum, mode, type);
5219 pcum->aapcs_cprc_slot = slot;
5221 /* We don't have to apply any of the rules from part B of the
5222 preparation phase, these are handled elsewhere in the
5223 compiler. */
5225 if (slot >= 0)
5227 /* A Co-processor register candidate goes either in its own
5228 class of registers or on the stack. */
5229 if (!pcum->aapcs_cprc_failed[slot])
5231 /* C1.cp - Try to allocate the argument to co-processor
5232 registers. */
5233 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5234 return;
5236 /* C2.cp - Put the argument on the stack and note that we
5237 can't assign any more candidates in this slot. We also
5238 need to note that we have allocated stack space, so that
5239 we won't later try to split a non-cprc candidate between
5240 core registers and the stack. */
5241 pcum->aapcs_cprc_failed[slot] = true;
5242 pcum->can_split = false;
5245 /* We didn't get a register, so this argument goes on the
5246 stack. */
5247 gcc_assert (pcum->can_split == false);
5248 return;
5252 /* C3 - For double-word aligned arguments, round the NCRN up to the
5253 next even number. */
5254 ncrn = pcum->aapcs_ncrn;
5255 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5256 ncrn++;
5258 nregs = ARM_NUM_REGS2(mode, type);
5260 /* Sigh, this test should really assert that nregs > 0, but a GCC
5261 extension allows empty structs and then gives them empty size; it
5262 then allows such a structure to be passed by value. For some of
5263 the code below we have to pretend that such an argument has
5264 non-zero size so that we 'locate' it correctly either in
5265 registers or on the stack. */
5266 gcc_assert (nregs >= 0);
5268 nregs2 = nregs ? nregs : 1;
5270 /* C4 - Argument fits entirely in core registers. */
5271 if (ncrn + nregs2 <= NUM_ARG_REGS)
5273 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5274 pcum->aapcs_next_ncrn = ncrn + nregs;
5275 return;
5278 /* C5 - Some core registers left and there are no arguments already
5279 on the stack: split this argument between the remaining core
5280 registers and the stack. */
5281 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5283 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5284 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5285 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5286 return;
5289 /* C6 - NCRN is set to 4. */
5290 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5292 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5293 return;
5296 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5297 for a call to a function whose data type is FNTYPE.
5298 For a library call, FNTYPE is NULL. */
5299 void
5300 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5301 rtx libname,
5302 tree fndecl ATTRIBUTE_UNUSED)
5304 /* Long call handling. */
5305 if (fntype)
5306 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5307 else
5308 pcum->pcs_variant = arm_pcs_default;
5310 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5312 if (arm_libcall_uses_aapcs_base (libname))
5313 pcum->pcs_variant = ARM_PCS_AAPCS;
5315 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5316 pcum->aapcs_reg = NULL_RTX;
5317 pcum->aapcs_partial = 0;
5318 pcum->aapcs_arg_processed = false;
5319 pcum->aapcs_cprc_slot = -1;
5320 pcum->can_split = true;
5322 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5324 int i;
5326 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5328 pcum->aapcs_cprc_failed[i] = false;
5329 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5332 return;
5335 /* Legacy ABIs */
5337 /* On the ARM, the offset starts at 0. */
5338 pcum->nregs = 0;
5339 pcum->iwmmxt_nregs = 0;
5340 pcum->can_split = true;
5342 /* Varargs vectors are treated the same as long long.
5343 named_count avoids having to change the way arm handles 'named' */
5344 pcum->named_count = 0;
5345 pcum->nargs = 0;
5347 if (TARGET_REALLY_IWMMXT && fntype)
5349 tree fn_arg;
5351 for (fn_arg = TYPE_ARG_TYPES (fntype);
5352 fn_arg;
5353 fn_arg = TREE_CHAIN (fn_arg))
5354 pcum->named_count += 1;
5356 if (! pcum->named_count)
5357 pcum->named_count = INT_MAX;
5361 /* Return true if we use LRA instead of reload pass. */
5362 static bool
5363 arm_lra_p (void)
5365 return arm_lra_flag;
5368 /* Return true if mode/type need doubleword alignment. */
5369 static bool
5370 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5372 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5373 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5377 /* Determine where to put an argument to a function.
5378 Value is zero to push the argument on the stack,
5379 or a hard register in which to store the argument.
5381 MODE is the argument's machine mode.
5382 TYPE is the data type of the argument (as a tree).
5383 This is null for libcalls where that information may
5384 not be available.
5385 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5386 the preceding args and about the function being called.
5387 NAMED is nonzero if this argument is a named parameter
5388 (otherwise it is an extra parameter matching an ellipsis).
5390 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5391 other arguments are passed on the stack. If (NAMED == 0) (which happens
5392 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5393 defined), say it is passed in the stack (function_prologue will
5394 indeed make it pass in the stack if necessary). */
5396 static rtx
5397 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5398 const_tree type, bool named)
5400 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5401 int nregs;
5403 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5404 a call insn (op3 of a call_value insn). */
5405 if (mode == VOIDmode)
5406 return const0_rtx;
5408 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5410 aapcs_layout_arg (pcum, mode, type, named);
5411 return pcum->aapcs_reg;
5414 /* Varargs vectors are treated the same as long long.
5415 named_count avoids having to change the way arm handles 'named' */
5416 if (TARGET_IWMMXT_ABI
5417 && arm_vector_mode_supported_p (mode)
5418 && pcum->named_count > pcum->nargs + 1)
5420 if (pcum->iwmmxt_nregs <= 9)
5421 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5422 else
5424 pcum->can_split = false;
5425 return NULL_RTX;
5429 /* Put doubleword aligned quantities in even register pairs. */
5430 if (pcum->nregs & 1
5431 && ARM_DOUBLEWORD_ALIGN
5432 && arm_needs_doubleword_align (mode, type))
5433 pcum->nregs++;
5435 /* Only allow splitting an arg between regs and memory if all preceding
5436 args were allocated to regs. For args passed by reference we only count
5437 the reference pointer. */
5438 if (pcum->can_split)
5439 nregs = 1;
5440 else
5441 nregs = ARM_NUM_REGS2 (mode, type);
5443 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5444 return NULL_RTX;
5446 return gen_rtx_REG (mode, pcum->nregs);
5449 static unsigned int
5450 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5452 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5453 ? DOUBLEWORD_ALIGNMENT
5454 : PARM_BOUNDARY);
5457 static int
5458 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5459 tree type, bool named)
5461 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5462 int nregs = pcum->nregs;
5464 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5466 aapcs_layout_arg (pcum, mode, type, named);
5467 return pcum->aapcs_partial;
5470 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5471 return 0;
5473 if (NUM_ARG_REGS > nregs
5474 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5475 && pcum->can_split)
5476 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5478 return 0;
5481 /* Update the data in PCUM to advance over an argument
5482 of mode MODE and data type TYPE.
5483 (TYPE is null for libcalls where that information may not be available.) */
5485 static void
5486 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5487 const_tree type, bool named)
5489 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5491 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5493 aapcs_layout_arg (pcum, mode, type, named);
5495 if (pcum->aapcs_cprc_slot >= 0)
5497 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5498 type);
5499 pcum->aapcs_cprc_slot = -1;
5502 /* Generic stuff. */
5503 pcum->aapcs_arg_processed = false;
5504 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5505 pcum->aapcs_reg = NULL_RTX;
5506 pcum->aapcs_partial = 0;
5508 else
5510 pcum->nargs += 1;
5511 if (arm_vector_mode_supported_p (mode)
5512 && pcum->named_count > pcum->nargs
5513 && TARGET_IWMMXT_ABI)
5514 pcum->iwmmxt_nregs += 1;
5515 else
5516 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5520 /* Variable sized types are passed by reference. This is a GCC
5521 extension to the ARM ABI. */
5523 static bool
5524 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5525 enum machine_mode mode ATTRIBUTE_UNUSED,
5526 const_tree type, bool named ATTRIBUTE_UNUSED)
5528 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5531 /* Encode the current state of the #pragma [no_]long_calls. */
5532 typedef enum
5534 OFF, /* No #pragma [no_]long_calls is in effect. */
5535 LONG, /* #pragma long_calls is in effect. */
5536 SHORT /* #pragma no_long_calls is in effect. */
5537 } arm_pragma_enum;
5539 static arm_pragma_enum arm_pragma_long_calls = OFF;
5541 void
5542 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5544 arm_pragma_long_calls = LONG;
5547 void
5548 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5550 arm_pragma_long_calls = SHORT;
5553 void
5554 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5556 arm_pragma_long_calls = OFF;
5559 /* Handle an attribute requiring a FUNCTION_DECL;
5560 arguments as in struct attribute_spec.handler. */
5561 static tree
5562 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5563 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5565 if (TREE_CODE (*node) != FUNCTION_DECL)
5567 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5568 name);
5569 *no_add_attrs = true;
5572 return NULL_TREE;
5575 /* Handle an "interrupt" or "isr" attribute;
5576 arguments as in struct attribute_spec.handler. */
5577 static tree
5578 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5579 bool *no_add_attrs)
5581 if (DECL_P (*node))
5583 if (TREE_CODE (*node) != FUNCTION_DECL)
5585 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5586 name);
5587 *no_add_attrs = true;
5589 /* FIXME: the argument if any is checked for type attributes;
5590 should it be checked for decl ones? */
5592 else
5594 if (TREE_CODE (*node) == FUNCTION_TYPE
5595 || TREE_CODE (*node) == METHOD_TYPE)
5597 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5599 warning (OPT_Wattributes, "%qE attribute ignored",
5600 name);
5601 *no_add_attrs = true;
5604 else if (TREE_CODE (*node) == POINTER_TYPE
5605 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5606 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5607 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5609 *node = build_variant_type_copy (*node);
5610 TREE_TYPE (*node) = build_type_attribute_variant
5611 (TREE_TYPE (*node),
5612 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5613 *no_add_attrs = true;
5615 else
5617 /* Possibly pass this attribute on from the type to a decl. */
5618 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5619 | (int) ATTR_FLAG_FUNCTION_NEXT
5620 | (int) ATTR_FLAG_ARRAY_NEXT))
5622 *no_add_attrs = true;
5623 return tree_cons (name, args, NULL_TREE);
5625 else
5627 warning (OPT_Wattributes, "%qE attribute ignored",
5628 name);
5633 return NULL_TREE;
5636 /* Handle a "pcs" attribute; arguments as in struct
5637 attribute_spec.handler. */
5638 static tree
5639 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5640 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5642 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5644 warning (OPT_Wattributes, "%qE attribute ignored", name);
5645 *no_add_attrs = true;
5647 return NULL_TREE;
5650 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5651 /* Handle the "notshared" attribute. This attribute is another way of
5652 requesting hidden visibility. ARM's compiler supports
5653 "__declspec(notshared)"; we support the same thing via an
5654 attribute. */
5656 static tree
5657 arm_handle_notshared_attribute (tree *node,
5658 tree name ATTRIBUTE_UNUSED,
5659 tree args ATTRIBUTE_UNUSED,
5660 int flags ATTRIBUTE_UNUSED,
5661 bool *no_add_attrs)
5663 tree decl = TYPE_NAME (*node);
5665 if (decl)
5667 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5668 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5669 *no_add_attrs = false;
5671 return NULL_TREE;
5673 #endif
5675 /* Return 0 if the attributes for two types are incompatible, 1 if they
5676 are compatible, and 2 if they are nearly compatible (which causes a
5677 warning to be generated). */
5678 static int
5679 arm_comp_type_attributes (const_tree type1, const_tree type2)
5681 int l1, l2, s1, s2;
5683 /* Check for mismatch of non-default calling convention. */
5684 if (TREE_CODE (type1) != FUNCTION_TYPE)
5685 return 1;
5687 /* Check for mismatched call attributes. */
5688 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5689 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5690 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5691 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5693 /* Only bother to check if an attribute is defined. */
5694 if (l1 | l2 | s1 | s2)
5696 /* If one type has an attribute, the other must have the same attribute. */
5697 if ((l1 != l2) || (s1 != s2))
5698 return 0;
5700 /* Disallow mixed attributes. */
5701 if ((l1 & s2) || (l2 & s1))
5702 return 0;
5705 /* Check for mismatched ISR attribute. */
5706 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5707 if (! l1)
5708 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5709 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5710 if (! l2)
5711 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5712 if (l1 != l2)
5713 return 0;
5715 return 1;
5718 /* Assigns default attributes to newly defined type. This is used to
5719 set short_call/long_call attributes for function types of
5720 functions defined inside corresponding #pragma scopes. */
5721 static void
5722 arm_set_default_type_attributes (tree type)
5724 /* Add __attribute__ ((long_call)) to all functions, when
5725 inside #pragma long_calls or __attribute__ ((short_call)),
5726 when inside #pragma no_long_calls. */
5727 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5729 tree type_attr_list, attr_name;
5730 type_attr_list = TYPE_ATTRIBUTES (type);
5732 if (arm_pragma_long_calls == LONG)
5733 attr_name = get_identifier ("long_call");
5734 else if (arm_pragma_long_calls == SHORT)
5735 attr_name = get_identifier ("short_call");
5736 else
5737 return;
5739 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5740 TYPE_ATTRIBUTES (type) = type_attr_list;
5744 /* Return true if DECL is known to be linked into section SECTION. */
5746 static bool
5747 arm_function_in_section_p (tree decl, section *section)
5749 /* We can only be certain about functions defined in the same
5750 compilation unit. */
5751 if (!TREE_STATIC (decl))
5752 return false;
5754 /* Make sure that SYMBOL always binds to the definition in this
5755 compilation unit. */
5756 if (!targetm.binds_local_p (decl))
5757 return false;
5759 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5760 if (!DECL_SECTION_NAME (decl))
5762 /* Make sure that we will not create a unique section for DECL. */
5763 if (flag_function_sections || DECL_ONE_ONLY (decl))
5764 return false;
5767 return function_section (decl) == section;
5770 /* Return nonzero if a 32-bit "long_call" should be generated for
5771 a call from the current function to DECL. We generate a long_call
5772 if the function:
5774 a. has an __attribute__((long call))
5775 or b. is within the scope of a #pragma long_calls
5776 or c. the -mlong-calls command line switch has been specified
5778 However we do not generate a long call if the function:
5780 d. has an __attribute__ ((short_call))
5781 or e. is inside the scope of a #pragma no_long_calls
5782 or f. is defined in the same section as the current function. */
5784 bool
5785 arm_is_long_call_p (tree decl)
5787 tree attrs;
5789 if (!decl)
5790 return TARGET_LONG_CALLS;
5792 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5793 if (lookup_attribute ("short_call", attrs))
5794 return false;
5796 /* For "f", be conservative, and only cater for cases in which the
5797 whole of the current function is placed in the same section. */
5798 if (!flag_reorder_blocks_and_partition
5799 && TREE_CODE (decl) == FUNCTION_DECL
5800 && arm_function_in_section_p (decl, current_function_section ()))
5801 return false;
5803 if (lookup_attribute ("long_call", attrs))
5804 return true;
5806 return TARGET_LONG_CALLS;
5809 /* Return nonzero if it is ok to make a tail-call to DECL. */
5810 static bool
5811 arm_function_ok_for_sibcall (tree decl, tree exp)
5813 unsigned long func_type;
5815 if (cfun->machine->sibcall_blocked)
5816 return false;
5818 /* Never tailcall something if we are generating code for Thumb-1. */
5819 if (TARGET_THUMB1)
5820 return false;
5822 /* The PIC register is live on entry to VxWorks PLT entries, so we
5823 must make the call before restoring the PIC register. */
5824 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5825 return false;
5827 /* Cannot tail-call to long calls, since these are out of range of
5828 a branch instruction. */
5829 if (decl && arm_is_long_call_p (decl))
5830 return false;
5832 /* If we are interworking and the function is not declared static
5833 then we can't tail-call it unless we know that it exists in this
5834 compilation unit (since it might be a Thumb routine). */
5835 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5836 && !TREE_ASM_WRITTEN (decl))
5837 return false;
5839 func_type = arm_current_func_type ();
5840 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5841 if (IS_INTERRUPT (func_type))
5842 return false;
5844 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5846 /* Check that the return value locations are the same. For
5847 example that we aren't returning a value from the sibling in
5848 a VFP register but then need to transfer it to a core
5849 register. */
5850 rtx a, b;
5852 a = arm_function_value (TREE_TYPE (exp), decl, false);
5853 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5854 cfun->decl, false);
5855 if (!rtx_equal_p (a, b))
5856 return false;
5859 /* Never tailcall if function may be called with a misaligned SP. */
5860 if (IS_STACKALIGN (func_type))
5861 return false;
5863 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5864 references should become a NOP. Don't convert such calls into
5865 sibling calls. */
5866 if (TARGET_AAPCS_BASED
5867 && arm_abi == ARM_ABI_AAPCS
5868 && decl
5869 && DECL_WEAK (decl))
5870 return false;
5872 /* Everything else is ok. */
5873 return true;
5877 /* Addressing mode support functions. */
5879 /* Return nonzero if X is a legitimate immediate operand when compiling
5880 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5882 legitimate_pic_operand_p (rtx x)
5884 if (GET_CODE (x) == SYMBOL_REF
5885 || (GET_CODE (x) == CONST
5886 && GET_CODE (XEXP (x, 0)) == PLUS
5887 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5888 return 0;
5890 return 1;
5893 /* Record that the current function needs a PIC register. Initialize
5894 cfun->machine->pic_reg if we have not already done so. */
5896 static void
5897 require_pic_register (void)
5899 /* A lot of the logic here is made obscure by the fact that this
5900 routine gets called as part of the rtx cost estimation process.
5901 We don't want those calls to affect any assumptions about the real
5902 function; and further, we can't call entry_of_function() until we
5903 start the real expansion process. */
5904 if (!crtl->uses_pic_offset_table)
5906 gcc_assert (can_create_pseudo_p ());
5907 if (arm_pic_register != INVALID_REGNUM)
5909 if (!cfun->machine->pic_reg)
5910 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5912 /* Play games to avoid marking the function as needing pic
5913 if we are being called as part of the cost-estimation
5914 process. */
5915 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5916 crtl->uses_pic_offset_table = 1;
5918 else
5920 rtx seq, insn;
5922 if (!cfun->machine->pic_reg)
5923 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5925 /* Play games to avoid marking the function as needing pic
5926 if we are being called as part of the cost-estimation
5927 process. */
5928 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5930 crtl->uses_pic_offset_table = 1;
5931 start_sequence ();
5933 arm_load_pic_register (0UL);
5935 seq = get_insns ();
5936 end_sequence ();
5938 for (insn = seq; insn; insn = NEXT_INSN (insn))
5939 if (INSN_P (insn))
5940 INSN_LOCATION (insn) = prologue_location;
5942 /* We can be called during expansion of PHI nodes, where
5943 we can't yet emit instructions directly in the final
5944 insn stream. Queue the insns on the entry edge, they will
5945 be committed after everything else is expanded. */
5946 insert_insn_on_edge (seq,
5947 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
5954 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5956 if (GET_CODE (orig) == SYMBOL_REF
5957 || GET_CODE (orig) == LABEL_REF)
5959 rtx insn;
5961 if (reg == 0)
5963 gcc_assert (can_create_pseudo_p ());
5964 reg = gen_reg_rtx (Pmode);
5967 /* VxWorks does not impose a fixed gap between segments; the run-time
5968 gap can be different from the object-file gap. We therefore can't
5969 use GOTOFF unless we are absolutely sure that the symbol is in the
5970 same segment as the GOT. Unfortunately, the flexibility of linker
5971 scripts means that we can't be sure of that in general, so assume
5972 that GOTOFF is never valid on VxWorks. */
5973 if ((GET_CODE (orig) == LABEL_REF
5974 || (GET_CODE (orig) == SYMBOL_REF &&
5975 SYMBOL_REF_LOCAL_P (orig)))
5976 && NEED_GOT_RELOC
5977 && arm_pic_data_is_text_relative)
5978 insn = arm_pic_static_addr (orig, reg);
5979 else
5981 rtx pat;
5982 rtx mem;
5984 /* If this function doesn't have a pic register, create one now. */
5985 require_pic_register ();
5987 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5989 /* Make the MEM as close to a constant as possible. */
5990 mem = SET_SRC (pat);
5991 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5992 MEM_READONLY_P (mem) = 1;
5993 MEM_NOTRAP_P (mem) = 1;
5995 insn = emit_insn (pat);
5998 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5999 by loop. */
6000 set_unique_reg_note (insn, REG_EQUAL, orig);
6002 return reg;
6004 else if (GET_CODE (orig) == CONST)
6006 rtx base, offset;
6008 if (GET_CODE (XEXP (orig, 0)) == PLUS
6009 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6010 return orig;
6012 /* Handle the case where we have: const (UNSPEC_TLS). */
6013 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6014 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6015 return orig;
6017 /* Handle the case where we have:
6018 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6019 CONST_INT. */
6020 if (GET_CODE (XEXP (orig, 0)) == PLUS
6021 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6022 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6024 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6025 return orig;
6028 if (reg == 0)
6030 gcc_assert (can_create_pseudo_p ());
6031 reg = gen_reg_rtx (Pmode);
6034 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6036 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6037 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6038 base == reg ? 0 : reg);
6040 if (CONST_INT_P (offset))
6042 /* The base register doesn't really matter, we only want to
6043 test the index for the appropriate mode. */
6044 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6046 gcc_assert (can_create_pseudo_p ());
6047 offset = force_reg (Pmode, offset);
6050 if (CONST_INT_P (offset))
6051 return plus_constant (Pmode, base, INTVAL (offset));
6054 if (GET_MODE_SIZE (mode) > 4
6055 && (GET_MODE_CLASS (mode) == MODE_INT
6056 || TARGET_SOFT_FLOAT))
6058 emit_insn (gen_addsi3 (reg, base, offset));
6059 return reg;
6062 return gen_rtx_PLUS (Pmode, base, offset);
6065 return orig;
6069 /* Find a spare register to use during the prolog of a function. */
6071 static int
6072 thumb_find_work_register (unsigned long pushed_regs_mask)
6074 int reg;
6076 /* Check the argument registers first as these are call-used. The
6077 register allocation order means that sometimes r3 might be used
6078 but earlier argument registers might not, so check them all. */
6079 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6080 if (!df_regs_ever_live_p (reg))
6081 return reg;
6083 /* Before going on to check the call-saved registers we can try a couple
6084 more ways of deducing that r3 is available. The first is when we are
6085 pushing anonymous arguments onto the stack and we have less than 4
6086 registers worth of fixed arguments(*). In this case r3 will be part of
6087 the variable argument list and so we can be sure that it will be
6088 pushed right at the start of the function. Hence it will be available
6089 for the rest of the prologue.
6090 (*): ie crtl->args.pretend_args_size is greater than 0. */
6091 if (cfun->machine->uses_anonymous_args
6092 && crtl->args.pretend_args_size > 0)
6093 return LAST_ARG_REGNUM;
6095 /* The other case is when we have fixed arguments but less than 4 registers
6096 worth. In this case r3 might be used in the body of the function, but
6097 it is not being used to convey an argument into the function. In theory
6098 we could just check crtl->args.size to see how many bytes are
6099 being passed in argument registers, but it seems that it is unreliable.
6100 Sometimes it will have the value 0 when in fact arguments are being
6101 passed. (See testcase execute/20021111-1.c for an example). So we also
6102 check the args_info.nregs field as well. The problem with this field is
6103 that it makes no allowances for arguments that are passed to the
6104 function but which are not used. Hence we could miss an opportunity
6105 when a function has an unused argument in r3. But it is better to be
6106 safe than to be sorry. */
6107 if (! cfun->machine->uses_anonymous_args
6108 && crtl->args.size >= 0
6109 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6110 && (TARGET_AAPCS_BASED
6111 ? crtl->args.info.aapcs_ncrn < 4
6112 : crtl->args.info.nregs < 4))
6113 return LAST_ARG_REGNUM;
6115 /* Otherwise look for a call-saved register that is going to be pushed. */
6116 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6117 if (pushed_regs_mask & (1 << reg))
6118 return reg;
6120 if (TARGET_THUMB2)
6122 /* Thumb-2 can use high regs. */
6123 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6124 if (pushed_regs_mask & (1 << reg))
6125 return reg;
6127 /* Something went wrong - thumb_compute_save_reg_mask()
6128 should have arranged for a suitable register to be pushed. */
6129 gcc_unreachable ();
6132 static GTY(()) int pic_labelno;
6134 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6135 low register. */
6137 void
6138 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6140 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6142 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6143 return;
6145 gcc_assert (flag_pic);
6147 pic_reg = cfun->machine->pic_reg;
6148 if (TARGET_VXWORKS_RTP)
6150 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6151 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6152 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6154 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6156 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6157 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6159 else
6161 /* We use an UNSPEC rather than a LABEL_REF because this label
6162 never appears in the code stream. */
6164 labelno = GEN_INT (pic_labelno++);
6165 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6166 l1 = gen_rtx_CONST (VOIDmode, l1);
6168 /* On the ARM the PC register contains 'dot + 8' at the time of the
6169 addition, on the Thumb it is 'dot + 4'. */
6170 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6171 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6172 UNSPEC_GOTSYM_OFF);
6173 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6175 if (TARGET_32BIT)
6177 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6179 else /* TARGET_THUMB1 */
6181 if (arm_pic_register != INVALID_REGNUM
6182 && REGNO (pic_reg) > LAST_LO_REGNUM)
6184 /* We will have pushed the pic register, so we should always be
6185 able to find a work register. */
6186 pic_tmp = gen_rtx_REG (SImode,
6187 thumb_find_work_register (saved_regs));
6188 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6189 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6190 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6192 else
6193 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6197 /* Need to emit this whether or not we obey regdecls,
6198 since setjmp/longjmp can cause life info to screw up. */
6199 emit_use (pic_reg);
6202 /* Generate code to load the address of a static var when flag_pic is set. */
6203 static rtx
6204 arm_pic_static_addr (rtx orig, rtx reg)
6206 rtx l1, labelno, offset_rtx, insn;
6208 gcc_assert (flag_pic);
6210 /* We use an UNSPEC rather than a LABEL_REF because this label
6211 never appears in the code stream. */
6212 labelno = GEN_INT (pic_labelno++);
6213 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6214 l1 = gen_rtx_CONST (VOIDmode, l1);
6216 /* On the ARM the PC register contains 'dot + 8' at the time of the
6217 addition, on the Thumb it is 'dot + 4'. */
6218 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6219 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6220 UNSPEC_SYMBOL_OFFSET);
6221 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6223 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6224 return insn;
6227 /* Return nonzero if X is valid as an ARM state addressing register. */
6228 static int
6229 arm_address_register_rtx_p (rtx x, int strict_p)
6231 int regno;
6233 if (!REG_P (x))
6234 return 0;
6236 regno = REGNO (x);
6238 if (strict_p)
6239 return ARM_REGNO_OK_FOR_BASE_P (regno);
6241 return (regno <= LAST_ARM_REGNUM
6242 || regno >= FIRST_PSEUDO_REGISTER
6243 || regno == FRAME_POINTER_REGNUM
6244 || regno == ARG_POINTER_REGNUM);
6247 /* Return TRUE if this rtx is the difference of a symbol and a label,
6248 and will reduce to a PC-relative relocation in the object file.
6249 Expressions like this can be left alone when generating PIC, rather
6250 than forced through the GOT. */
6251 static int
6252 pcrel_constant_p (rtx x)
6254 if (GET_CODE (x) == MINUS)
6255 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6257 return FALSE;
6260 /* Return true if X will surely end up in an index register after next
6261 splitting pass. */
6262 static bool
6263 will_be_in_index_register (const_rtx x)
6265 /* arm.md: calculate_pic_address will split this into a register. */
6266 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6269 /* Return nonzero if X is a valid ARM state address operand. */
6271 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6272 int strict_p)
6274 bool use_ldrd;
6275 enum rtx_code code = GET_CODE (x);
6277 if (arm_address_register_rtx_p (x, strict_p))
6278 return 1;
6280 use_ldrd = (TARGET_LDRD
6281 && (mode == DImode
6282 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6284 if (code == POST_INC || code == PRE_DEC
6285 || ((code == PRE_INC || code == POST_DEC)
6286 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6287 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6289 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6290 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6291 && GET_CODE (XEXP (x, 1)) == PLUS
6292 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6294 rtx addend = XEXP (XEXP (x, 1), 1);
6296 /* Don't allow ldrd post increment by register because it's hard
6297 to fixup invalid register choices. */
6298 if (use_ldrd
6299 && GET_CODE (x) == POST_MODIFY
6300 && REG_P (addend))
6301 return 0;
6303 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6304 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6307 /* After reload constants split into minipools will have addresses
6308 from a LABEL_REF. */
6309 else if (reload_completed
6310 && (code == LABEL_REF
6311 || (code == CONST
6312 && GET_CODE (XEXP (x, 0)) == PLUS
6313 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6314 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6315 return 1;
6317 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6318 return 0;
6320 else if (code == PLUS)
6322 rtx xop0 = XEXP (x, 0);
6323 rtx xop1 = XEXP (x, 1);
6325 return ((arm_address_register_rtx_p (xop0, strict_p)
6326 && ((CONST_INT_P (xop1)
6327 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6328 || (!strict_p && will_be_in_index_register (xop1))))
6329 || (arm_address_register_rtx_p (xop1, strict_p)
6330 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6333 #if 0
6334 /* Reload currently can't handle MINUS, so disable this for now */
6335 else if (GET_CODE (x) == MINUS)
6337 rtx xop0 = XEXP (x, 0);
6338 rtx xop1 = XEXP (x, 1);
6340 return (arm_address_register_rtx_p (xop0, strict_p)
6341 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6343 #endif
6345 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6346 && code == SYMBOL_REF
6347 && CONSTANT_POOL_ADDRESS_P (x)
6348 && ! (flag_pic
6349 && symbol_mentioned_p (get_pool_constant (x))
6350 && ! pcrel_constant_p (get_pool_constant (x))))
6351 return 1;
6353 return 0;
6356 /* Return nonzero if X is a valid Thumb-2 address operand. */
6357 static int
6358 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6360 bool use_ldrd;
6361 enum rtx_code code = GET_CODE (x);
6363 if (arm_address_register_rtx_p (x, strict_p))
6364 return 1;
6366 use_ldrd = (TARGET_LDRD
6367 && (mode == DImode
6368 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6370 if (code == POST_INC || code == PRE_DEC
6371 || ((code == PRE_INC || code == POST_DEC)
6372 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6373 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6375 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6376 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6377 && GET_CODE (XEXP (x, 1)) == PLUS
6378 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6380 /* Thumb-2 only has autoincrement by constant. */
6381 rtx addend = XEXP (XEXP (x, 1), 1);
6382 HOST_WIDE_INT offset;
6384 if (!CONST_INT_P (addend))
6385 return 0;
6387 offset = INTVAL(addend);
6388 if (GET_MODE_SIZE (mode) <= 4)
6389 return (offset > -256 && offset < 256);
6391 return (use_ldrd && offset > -1024 && offset < 1024
6392 && (offset & 3) == 0);
6395 /* After reload constants split into minipools will have addresses
6396 from a LABEL_REF. */
6397 else if (reload_completed
6398 && (code == LABEL_REF
6399 || (code == CONST
6400 && GET_CODE (XEXP (x, 0)) == PLUS
6401 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6402 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6403 return 1;
6405 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6406 return 0;
6408 else if (code == PLUS)
6410 rtx xop0 = XEXP (x, 0);
6411 rtx xop1 = XEXP (x, 1);
6413 return ((arm_address_register_rtx_p (xop0, strict_p)
6414 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6415 || (!strict_p && will_be_in_index_register (xop1))))
6416 || (arm_address_register_rtx_p (xop1, strict_p)
6417 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6420 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6421 && code == SYMBOL_REF
6422 && CONSTANT_POOL_ADDRESS_P (x)
6423 && ! (flag_pic
6424 && symbol_mentioned_p (get_pool_constant (x))
6425 && ! pcrel_constant_p (get_pool_constant (x))))
6426 return 1;
6428 return 0;
6431 /* Return nonzero if INDEX is valid for an address index operand in
6432 ARM state. */
6433 static int
6434 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6435 int strict_p)
6437 HOST_WIDE_INT range;
6438 enum rtx_code code = GET_CODE (index);
6440 /* Standard coprocessor addressing modes. */
6441 if (TARGET_HARD_FLOAT
6442 && TARGET_VFP
6443 && (mode == SFmode || mode == DFmode))
6444 return (code == CONST_INT && INTVAL (index) < 1024
6445 && INTVAL (index) > -1024
6446 && (INTVAL (index) & 3) == 0);
6448 /* For quad modes, we restrict the constant offset to be slightly less
6449 than what the instruction format permits. We do this because for
6450 quad mode moves, we will actually decompose them into two separate
6451 double-mode reads or writes. INDEX must therefore be a valid
6452 (double-mode) offset and so should INDEX+8. */
6453 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6454 return (code == CONST_INT
6455 && INTVAL (index) < 1016
6456 && INTVAL (index) > -1024
6457 && (INTVAL (index) & 3) == 0);
6459 /* We have no such constraint on double mode offsets, so we permit the
6460 full range of the instruction format. */
6461 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6462 return (code == CONST_INT
6463 && INTVAL (index) < 1024
6464 && INTVAL (index) > -1024
6465 && (INTVAL (index) & 3) == 0);
6467 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6468 return (code == CONST_INT
6469 && INTVAL (index) < 1024
6470 && INTVAL (index) > -1024
6471 && (INTVAL (index) & 3) == 0);
6473 if (arm_address_register_rtx_p (index, strict_p)
6474 && (GET_MODE_SIZE (mode) <= 4))
6475 return 1;
6477 if (mode == DImode || mode == DFmode)
6479 if (code == CONST_INT)
6481 HOST_WIDE_INT val = INTVAL (index);
6483 if (TARGET_LDRD)
6484 return val > -256 && val < 256;
6485 else
6486 return val > -4096 && val < 4092;
6489 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6492 if (GET_MODE_SIZE (mode) <= 4
6493 && ! (arm_arch4
6494 && (mode == HImode
6495 || mode == HFmode
6496 || (mode == QImode && outer == SIGN_EXTEND))))
6498 if (code == MULT)
6500 rtx xiop0 = XEXP (index, 0);
6501 rtx xiop1 = XEXP (index, 1);
6503 return ((arm_address_register_rtx_p (xiop0, strict_p)
6504 && power_of_two_operand (xiop1, SImode))
6505 || (arm_address_register_rtx_p (xiop1, strict_p)
6506 && power_of_two_operand (xiop0, SImode)));
6508 else if (code == LSHIFTRT || code == ASHIFTRT
6509 || code == ASHIFT || code == ROTATERT)
6511 rtx op = XEXP (index, 1);
6513 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6514 && CONST_INT_P (op)
6515 && INTVAL (op) > 0
6516 && INTVAL (op) <= 31);
6520 /* For ARM v4 we may be doing a sign-extend operation during the
6521 load. */
6522 if (arm_arch4)
6524 if (mode == HImode
6525 || mode == HFmode
6526 || (outer == SIGN_EXTEND && mode == QImode))
6527 range = 256;
6528 else
6529 range = 4096;
6531 else
6532 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6534 return (code == CONST_INT
6535 && INTVAL (index) < range
6536 && INTVAL (index) > -range);
6539 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6540 index operand. i.e. 1, 2, 4 or 8. */
6541 static bool
6542 thumb2_index_mul_operand (rtx op)
6544 HOST_WIDE_INT val;
6546 if (!CONST_INT_P (op))
6547 return false;
6549 val = INTVAL(op);
6550 return (val == 1 || val == 2 || val == 4 || val == 8);
6553 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6554 static int
6555 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6557 enum rtx_code code = GET_CODE (index);
6559 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6560 /* Standard coprocessor addressing modes. */
6561 if (TARGET_HARD_FLOAT
6562 && TARGET_VFP
6563 && (mode == SFmode || mode == DFmode))
6564 return (code == CONST_INT && INTVAL (index) < 1024
6565 /* Thumb-2 allows only > -256 index range for it's core register
6566 load/stores. Since we allow SF/DF in core registers, we have
6567 to use the intersection between -256~4096 (core) and -1024~1024
6568 (coprocessor). */
6569 && INTVAL (index) > -256
6570 && (INTVAL (index) & 3) == 0);
6572 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6574 /* For DImode assume values will usually live in core regs
6575 and only allow LDRD addressing modes. */
6576 if (!TARGET_LDRD || mode != DImode)
6577 return (code == CONST_INT
6578 && INTVAL (index) < 1024
6579 && INTVAL (index) > -1024
6580 && (INTVAL (index) & 3) == 0);
6583 /* For quad modes, we restrict the constant offset to be slightly less
6584 than what the instruction format permits. We do this because for
6585 quad mode moves, we will actually decompose them into two separate
6586 double-mode reads or writes. INDEX must therefore be a valid
6587 (double-mode) offset and so should INDEX+8. */
6588 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6589 return (code == CONST_INT
6590 && INTVAL (index) < 1016
6591 && INTVAL (index) > -1024
6592 && (INTVAL (index) & 3) == 0);
6594 /* We have no such constraint on double mode offsets, so we permit the
6595 full range of the instruction format. */
6596 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6597 return (code == CONST_INT
6598 && INTVAL (index) < 1024
6599 && INTVAL (index) > -1024
6600 && (INTVAL (index) & 3) == 0);
6602 if (arm_address_register_rtx_p (index, strict_p)
6603 && (GET_MODE_SIZE (mode) <= 4))
6604 return 1;
6606 if (mode == DImode || mode == DFmode)
6608 if (code == CONST_INT)
6610 HOST_WIDE_INT val = INTVAL (index);
6611 /* ??? Can we assume ldrd for thumb2? */
6612 /* Thumb-2 ldrd only has reg+const addressing modes. */
6613 /* ldrd supports offsets of +-1020.
6614 However the ldr fallback does not. */
6615 return val > -256 && val < 256 && (val & 3) == 0;
6617 else
6618 return 0;
6621 if (code == MULT)
6623 rtx xiop0 = XEXP (index, 0);
6624 rtx xiop1 = XEXP (index, 1);
6626 return ((arm_address_register_rtx_p (xiop0, strict_p)
6627 && thumb2_index_mul_operand (xiop1))
6628 || (arm_address_register_rtx_p (xiop1, strict_p)
6629 && thumb2_index_mul_operand (xiop0)));
6631 else if (code == ASHIFT)
6633 rtx op = XEXP (index, 1);
6635 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6636 && CONST_INT_P (op)
6637 && INTVAL (op) > 0
6638 && INTVAL (op) <= 3);
6641 return (code == CONST_INT
6642 && INTVAL (index) < 4096
6643 && INTVAL (index) > -256);
6646 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6647 static int
6648 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6650 int regno;
6652 if (!REG_P (x))
6653 return 0;
6655 regno = REGNO (x);
6657 if (strict_p)
6658 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6660 return (regno <= LAST_LO_REGNUM
6661 || regno > LAST_VIRTUAL_REGISTER
6662 || regno == FRAME_POINTER_REGNUM
6663 || (GET_MODE_SIZE (mode) >= 4
6664 && (regno == STACK_POINTER_REGNUM
6665 || regno >= FIRST_PSEUDO_REGISTER
6666 || x == hard_frame_pointer_rtx
6667 || x == arg_pointer_rtx)));
6670 /* Return nonzero if x is a legitimate index register. This is the case
6671 for any base register that can access a QImode object. */
6672 inline static int
6673 thumb1_index_register_rtx_p (rtx x, int strict_p)
6675 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6678 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6680 The AP may be eliminated to either the SP or the FP, so we use the
6681 least common denominator, e.g. SImode, and offsets from 0 to 64.
6683 ??? Verify whether the above is the right approach.
6685 ??? Also, the FP may be eliminated to the SP, so perhaps that
6686 needs special handling also.
6688 ??? Look at how the mips16 port solves this problem. It probably uses
6689 better ways to solve some of these problems.
6691 Although it is not incorrect, we don't accept QImode and HImode
6692 addresses based on the frame pointer or arg pointer until the
6693 reload pass starts. This is so that eliminating such addresses
6694 into stack based ones won't produce impossible code. */
6696 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6698 /* ??? Not clear if this is right. Experiment. */
6699 if (GET_MODE_SIZE (mode) < 4
6700 && !(reload_in_progress || reload_completed)
6701 && (reg_mentioned_p (frame_pointer_rtx, x)
6702 || reg_mentioned_p (arg_pointer_rtx, x)
6703 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6704 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6705 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6706 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6707 return 0;
6709 /* Accept any base register. SP only in SImode or larger. */
6710 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6711 return 1;
6713 /* This is PC relative data before arm_reorg runs. */
6714 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6715 && GET_CODE (x) == SYMBOL_REF
6716 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6717 return 1;
6719 /* This is PC relative data after arm_reorg runs. */
6720 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6721 && reload_completed
6722 && (GET_CODE (x) == LABEL_REF
6723 || (GET_CODE (x) == CONST
6724 && GET_CODE (XEXP (x, 0)) == PLUS
6725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6726 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6727 return 1;
6729 /* Post-inc indexing only supported for SImode and larger. */
6730 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6731 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6732 return 1;
6734 else if (GET_CODE (x) == PLUS)
6736 /* REG+REG address can be any two index registers. */
6737 /* We disallow FRAME+REG addressing since we know that FRAME
6738 will be replaced with STACK, and SP relative addressing only
6739 permits SP+OFFSET. */
6740 if (GET_MODE_SIZE (mode) <= 4
6741 && XEXP (x, 0) != frame_pointer_rtx
6742 && XEXP (x, 1) != frame_pointer_rtx
6743 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6744 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6745 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6746 return 1;
6748 /* REG+const has 5-7 bit offset for non-SP registers. */
6749 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6750 || XEXP (x, 0) == arg_pointer_rtx)
6751 && CONST_INT_P (XEXP (x, 1))
6752 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6753 return 1;
6755 /* REG+const has 10-bit offset for SP, but only SImode and
6756 larger is supported. */
6757 /* ??? Should probably check for DI/DFmode overflow here
6758 just like GO_IF_LEGITIMATE_OFFSET does. */
6759 else if (REG_P (XEXP (x, 0))
6760 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6761 && GET_MODE_SIZE (mode) >= 4
6762 && CONST_INT_P (XEXP (x, 1))
6763 && INTVAL (XEXP (x, 1)) >= 0
6764 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6765 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6766 return 1;
6768 else if (REG_P (XEXP (x, 0))
6769 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6770 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6771 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6772 && REGNO (XEXP (x, 0))
6773 <= LAST_VIRTUAL_POINTER_REGISTER))
6774 && GET_MODE_SIZE (mode) >= 4
6775 && CONST_INT_P (XEXP (x, 1))
6776 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6777 return 1;
6780 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6781 && GET_MODE_SIZE (mode) == 4
6782 && GET_CODE (x) == SYMBOL_REF
6783 && CONSTANT_POOL_ADDRESS_P (x)
6784 && ! (flag_pic
6785 && symbol_mentioned_p (get_pool_constant (x))
6786 && ! pcrel_constant_p (get_pool_constant (x))))
6787 return 1;
6789 return 0;
6792 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6793 instruction of mode MODE. */
6795 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6797 switch (GET_MODE_SIZE (mode))
6799 case 1:
6800 return val >= 0 && val < 32;
6802 case 2:
6803 return val >= 0 && val < 64 && (val & 1) == 0;
6805 default:
6806 return (val >= 0
6807 && (val + GET_MODE_SIZE (mode)) <= 128
6808 && (val & 3) == 0);
6812 bool
6813 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6815 if (TARGET_ARM)
6816 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6817 else if (TARGET_THUMB2)
6818 return thumb2_legitimate_address_p (mode, x, strict_p);
6819 else /* if (TARGET_THUMB1) */
6820 return thumb1_legitimate_address_p (mode, x, strict_p);
6823 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6825 Given an rtx X being reloaded into a reg required to be
6826 in class CLASS, return the class of reg to actually use.
6827 In general this is just CLASS, but for the Thumb core registers and
6828 immediate constants we prefer a LO_REGS class or a subset. */
6830 static reg_class_t
6831 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6833 if (TARGET_32BIT)
6834 return rclass;
6835 else
6837 if (rclass == GENERAL_REGS
6838 || rclass == HI_REGS
6839 || rclass == NO_REGS
6840 || rclass == STACK_REG)
6841 return LO_REGS;
6842 else
6843 return rclass;
6847 /* Build the SYMBOL_REF for __tls_get_addr. */
6849 static GTY(()) rtx tls_get_addr_libfunc;
6851 static rtx
6852 get_tls_get_addr (void)
6854 if (!tls_get_addr_libfunc)
6855 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6856 return tls_get_addr_libfunc;
6860 arm_load_tp (rtx target)
6862 if (!target)
6863 target = gen_reg_rtx (SImode);
6865 if (TARGET_HARD_TP)
6867 /* Can return in any reg. */
6868 emit_insn (gen_load_tp_hard (target));
6870 else
6872 /* Always returned in r0. Immediately copy the result into a pseudo,
6873 otherwise other uses of r0 (e.g. setting up function arguments) may
6874 clobber the value. */
6876 rtx tmp;
6878 emit_insn (gen_load_tp_soft ());
6880 tmp = gen_rtx_REG (SImode, 0);
6881 emit_move_insn (target, tmp);
6883 return target;
6886 static rtx
6887 load_tls_operand (rtx x, rtx reg)
6889 rtx tmp;
6891 if (reg == NULL_RTX)
6892 reg = gen_reg_rtx (SImode);
6894 tmp = gen_rtx_CONST (SImode, x);
6896 emit_move_insn (reg, tmp);
6898 return reg;
6901 static rtx
6902 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6904 rtx insns, label, labelno, sum;
6906 gcc_assert (reloc != TLS_DESCSEQ);
6907 start_sequence ();
6909 labelno = GEN_INT (pic_labelno++);
6910 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6911 label = gen_rtx_CONST (VOIDmode, label);
6913 sum = gen_rtx_UNSPEC (Pmode,
6914 gen_rtvec (4, x, GEN_INT (reloc), label,
6915 GEN_INT (TARGET_ARM ? 8 : 4)),
6916 UNSPEC_TLS);
6917 reg = load_tls_operand (sum, reg);
6919 if (TARGET_ARM)
6920 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6921 else
6922 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6924 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6925 LCT_PURE, /* LCT_CONST? */
6926 Pmode, 1, reg, Pmode);
6928 insns = get_insns ();
6929 end_sequence ();
6931 return insns;
6934 static rtx
6935 arm_tls_descseq_addr (rtx x, rtx reg)
6937 rtx labelno = GEN_INT (pic_labelno++);
6938 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6939 rtx sum = gen_rtx_UNSPEC (Pmode,
6940 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6941 gen_rtx_CONST (VOIDmode, label),
6942 GEN_INT (!TARGET_ARM)),
6943 UNSPEC_TLS);
6944 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6946 emit_insn (gen_tlscall (x, labelno));
6947 if (!reg)
6948 reg = gen_reg_rtx (SImode);
6949 else
6950 gcc_assert (REGNO (reg) != 0);
6952 emit_move_insn (reg, reg0);
6954 return reg;
6958 legitimize_tls_address (rtx x, rtx reg)
6960 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6961 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6963 switch (model)
6965 case TLS_MODEL_GLOBAL_DYNAMIC:
6966 if (TARGET_GNU2_TLS)
6968 reg = arm_tls_descseq_addr (x, reg);
6970 tp = arm_load_tp (NULL_RTX);
6972 dest = gen_rtx_PLUS (Pmode, tp, reg);
6974 else
6976 /* Original scheme */
6977 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6978 dest = gen_reg_rtx (Pmode);
6979 emit_libcall_block (insns, dest, ret, x);
6981 return dest;
6983 case TLS_MODEL_LOCAL_DYNAMIC:
6984 if (TARGET_GNU2_TLS)
6986 reg = arm_tls_descseq_addr (x, reg);
6988 tp = arm_load_tp (NULL_RTX);
6990 dest = gen_rtx_PLUS (Pmode, tp, reg);
6992 else
6994 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6996 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6997 share the LDM result with other LD model accesses. */
6998 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6999 UNSPEC_TLS);
7000 dest = gen_reg_rtx (Pmode);
7001 emit_libcall_block (insns, dest, ret, eqv);
7003 /* Load the addend. */
7004 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7005 GEN_INT (TLS_LDO32)),
7006 UNSPEC_TLS);
7007 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7008 dest = gen_rtx_PLUS (Pmode, dest, addend);
7010 return dest;
7012 case TLS_MODEL_INITIAL_EXEC:
7013 labelno = GEN_INT (pic_labelno++);
7014 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7015 label = gen_rtx_CONST (VOIDmode, label);
7016 sum = gen_rtx_UNSPEC (Pmode,
7017 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7018 GEN_INT (TARGET_ARM ? 8 : 4)),
7019 UNSPEC_TLS);
7020 reg = load_tls_operand (sum, reg);
7022 if (TARGET_ARM)
7023 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7024 else if (TARGET_THUMB2)
7025 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7026 else
7028 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7029 emit_move_insn (reg, gen_const_mem (SImode, reg));
7032 tp = arm_load_tp (NULL_RTX);
7034 return gen_rtx_PLUS (Pmode, tp, reg);
7036 case TLS_MODEL_LOCAL_EXEC:
7037 tp = arm_load_tp (NULL_RTX);
7039 reg = gen_rtx_UNSPEC (Pmode,
7040 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7041 UNSPEC_TLS);
7042 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7044 return gen_rtx_PLUS (Pmode, tp, reg);
7046 default:
7047 abort ();
7051 /* Try machine-dependent ways of modifying an illegitimate address
7052 to be legitimate. If we find one, return the new, valid address. */
7054 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7056 if (!TARGET_ARM)
7058 /* TODO: legitimize_address for Thumb2. */
7059 if (TARGET_THUMB2)
7060 return x;
7061 return thumb_legitimize_address (x, orig_x, mode);
7064 if (arm_tls_symbol_p (x))
7065 return legitimize_tls_address (x, NULL_RTX);
7067 if (GET_CODE (x) == PLUS)
7069 rtx xop0 = XEXP (x, 0);
7070 rtx xop1 = XEXP (x, 1);
7072 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7073 xop0 = force_reg (SImode, xop0);
7075 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
7076 xop1 = force_reg (SImode, xop1);
7078 if (ARM_BASE_REGISTER_RTX_P (xop0)
7079 && CONST_INT_P (xop1))
7081 HOST_WIDE_INT n, low_n;
7082 rtx base_reg, val;
7083 n = INTVAL (xop1);
7085 /* VFP addressing modes actually allow greater offsets, but for
7086 now we just stick with the lowest common denominator. */
7087 if (mode == DImode
7088 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7090 low_n = n & 0x0f;
7091 n &= ~0x0f;
7092 if (low_n > 4)
7094 n += 16;
7095 low_n -= 16;
7098 else
7100 low_n = ((mode) == TImode ? 0
7101 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7102 n -= low_n;
7105 base_reg = gen_reg_rtx (SImode);
7106 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7107 emit_move_insn (base_reg, val);
7108 x = plus_constant (Pmode, base_reg, low_n);
7110 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7111 x = gen_rtx_PLUS (SImode, xop0, xop1);
7114 /* XXX We don't allow MINUS any more -- see comment in
7115 arm_legitimate_address_outer_p (). */
7116 else if (GET_CODE (x) == MINUS)
7118 rtx xop0 = XEXP (x, 0);
7119 rtx xop1 = XEXP (x, 1);
7121 if (CONSTANT_P (xop0))
7122 xop0 = force_reg (SImode, xop0);
7124 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7125 xop1 = force_reg (SImode, xop1);
7127 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7128 x = gen_rtx_MINUS (SImode, xop0, xop1);
7131 /* Make sure to take full advantage of the pre-indexed addressing mode
7132 with absolute addresses which often allows for the base register to
7133 be factorized for multiple adjacent memory references, and it might
7134 even allows for the mini pool to be avoided entirely. */
7135 else if (CONST_INT_P (x) && optimize > 0)
7137 unsigned int bits;
7138 HOST_WIDE_INT mask, base, index;
7139 rtx base_reg;
7141 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7142 use a 8-bit index. So let's use a 12-bit index for SImode only and
7143 hope that arm_gen_constant will enable ldrb to use more bits. */
7144 bits = (mode == SImode) ? 12 : 8;
7145 mask = (1 << bits) - 1;
7146 base = INTVAL (x) & ~mask;
7147 index = INTVAL (x) & mask;
7148 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7150 /* It'll most probably be more efficient to generate the base
7151 with more bits set and use a negative index instead. */
7152 base |= mask;
7153 index -= mask;
7155 base_reg = force_reg (SImode, GEN_INT (base));
7156 x = plus_constant (Pmode, base_reg, index);
7159 if (flag_pic)
7161 /* We need to find and carefully transform any SYMBOL and LABEL
7162 references; so go back to the original address expression. */
7163 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7165 if (new_x != orig_x)
7166 x = new_x;
7169 return x;
7173 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7174 to be legitimate. If we find one, return the new, valid address. */
7176 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7178 if (arm_tls_symbol_p (x))
7179 return legitimize_tls_address (x, NULL_RTX);
7181 if (GET_CODE (x) == PLUS
7182 && CONST_INT_P (XEXP (x, 1))
7183 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7184 || INTVAL (XEXP (x, 1)) < 0))
7186 rtx xop0 = XEXP (x, 0);
7187 rtx xop1 = XEXP (x, 1);
7188 HOST_WIDE_INT offset = INTVAL (xop1);
7190 /* Try and fold the offset into a biasing of the base register and
7191 then offsetting that. Don't do this when optimizing for space
7192 since it can cause too many CSEs. */
7193 if (optimize_size && offset >= 0
7194 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7196 HOST_WIDE_INT delta;
7198 if (offset >= 256)
7199 delta = offset - (256 - GET_MODE_SIZE (mode));
7200 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7201 delta = 31 * GET_MODE_SIZE (mode);
7202 else
7203 delta = offset & (~31 * GET_MODE_SIZE (mode));
7205 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7206 NULL_RTX);
7207 x = plus_constant (Pmode, xop0, delta);
7209 else if (offset < 0 && offset > -256)
7210 /* Small negative offsets are best done with a subtract before the
7211 dereference, forcing these into a register normally takes two
7212 instructions. */
7213 x = force_operand (x, NULL_RTX);
7214 else
7216 /* For the remaining cases, force the constant into a register. */
7217 xop1 = force_reg (SImode, xop1);
7218 x = gen_rtx_PLUS (SImode, xop0, xop1);
7221 else if (GET_CODE (x) == PLUS
7222 && s_register_operand (XEXP (x, 1), SImode)
7223 && !s_register_operand (XEXP (x, 0), SImode))
7225 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7227 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7230 if (flag_pic)
7232 /* We need to find and carefully transform any SYMBOL and LABEL
7233 references; so go back to the original address expression. */
7234 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7236 if (new_x != orig_x)
7237 x = new_x;
7240 return x;
7243 bool
7244 arm_legitimize_reload_address (rtx *p,
7245 enum machine_mode mode,
7246 int opnum, int type,
7247 int ind_levels ATTRIBUTE_UNUSED)
7249 /* We must recognize output that we have already generated ourselves. */
7250 if (GET_CODE (*p) == PLUS
7251 && GET_CODE (XEXP (*p, 0)) == PLUS
7252 && REG_P (XEXP (XEXP (*p, 0), 0))
7253 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7254 && CONST_INT_P (XEXP (*p, 1)))
7256 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7257 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7258 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7259 return true;
7262 if (GET_CODE (*p) == PLUS
7263 && REG_P (XEXP (*p, 0))
7264 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7265 /* If the base register is equivalent to a constant, let the generic
7266 code handle it. Otherwise we will run into problems if a future
7267 reload pass decides to rematerialize the constant. */
7268 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7269 && CONST_INT_P (XEXP (*p, 1)))
7271 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7272 HOST_WIDE_INT low, high;
7274 /* Detect coprocessor load/stores. */
7275 bool coproc_p = ((TARGET_HARD_FLOAT
7276 && TARGET_VFP
7277 && (mode == SFmode || mode == DFmode))
7278 || (TARGET_REALLY_IWMMXT
7279 && VALID_IWMMXT_REG_MODE (mode))
7280 || (TARGET_NEON
7281 && (VALID_NEON_DREG_MODE (mode)
7282 || VALID_NEON_QREG_MODE (mode))));
7284 /* For some conditions, bail out when lower two bits are unaligned. */
7285 if ((val & 0x3) != 0
7286 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7287 && (coproc_p
7288 /* For DI, and DF under soft-float: */
7289 || ((mode == DImode || mode == DFmode)
7290 /* Without ldrd, we use stm/ldm, which does not
7291 fair well with unaligned bits. */
7292 && (! TARGET_LDRD
7293 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7294 || TARGET_THUMB2))))
7295 return false;
7297 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7298 of which the (reg+high) gets turned into a reload add insn,
7299 we try to decompose the index into high/low values that can often
7300 also lead to better reload CSE.
7301 For example:
7302 ldr r0, [r2, #4100] // Offset too large
7303 ldr r1, [r2, #4104] // Offset too large
7305 is best reloaded as:
7306 add t1, r2, #4096
7307 ldr r0, [t1, #4]
7308 add t2, r2, #4096
7309 ldr r1, [t2, #8]
7311 which post-reload CSE can simplify in most cases to eliminate the
7312 second add instruction:
7313 add t1, r2, #4096
7314 ldr r0, [t1, #4]
7315 ldr r1, [t1, #8]
7317 The idea here is that we want to split out the bits of the constant
7318 as a mask, rather than as subtracting the maximum offset that the
7319 respective type of load/store used can handle.
7321 When encountering negative offsets, we can still utilize it even if
7322 the overall offset is positive; sometimes this may lead to an immediate
7323 that can be constructed with fewer instructions.
7324 For example:
7325 ldr r0, [r2, #0x3FFFFC]
7327 This is best reloaded as:
7328 add t1, r2, #0x400000
7329 ldr r0, [t1, #-4]
7331 The trick for spotting this for a load insn with N bits of offset
7332 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7333 negative offset that is going to make bit N and all the bits below
7334 it become zero in the remainder part.
7336 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7337 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7338 used in most cases of ARM load/store instructions. */
7340 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7341 (((VAL) & ((1 << (N)) - 1)) \
7342 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7343 : 0)
7345 if (coproc_p)
7347 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7349 /* NEON quad-word load/stores are made of two double-word accesses,
7350 so the valid index range is reduced by 8. Treat as 9-bit range if
7351 we go over it. */
7352 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7353 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7355 else if (GET_MODE_SIZE (mode) == 8)
7357 if (TARGET_LDRD)
7358 low = (TARGET_THUMB2
7359 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7360 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7361 else
7362 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7363 to access doublewords. The supported load/store offsets are
7364 -8, -4, and 4, which we try to produce here. */
7365 low = ((val & 0xf) ^ 0x8) - 0x8;
7367 else if (GET_MODE_SIZE (mode) < 8)
7369 /* NEON element load/stores do not have an offset. */
7370 if (TARGET_NEON_FP16 && mode == HFmode)
7371 return false;
7373 if (TARGET_THUMB2)
7375 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7376 Try the wider 12-bit range first, and re-try if the result
7377 is out of range. */
7378 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7379 if (low < -255)
7380 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7382 else
7384 if (mode == HImode || mode == HFmode)
7386 if (arm_arch4)
7387 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7388 else
7390 /* The storehi/movhi_bytes fallbacks can use only
7391 [-4094,+4094] of the full ldrb/strb index range. */
7392 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7393 if (low == 4095 || low == -4095)
7394 return false;
7397 else
7398 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7401 else
7402 return false;
7404 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7405 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7406 - (unsigned HOST_WIDE_INT) 0x80000000);
7407 /* Check for overflow or zero */
7408 if (low == 0 || high == 0 || (high + low != val))
7409 return false;
7411 /* Reload the high part into a base reg; leave the low part
7412 in the mem.
7413 Note that replacing this gen_rtx_PLUS with plus_constant is
7414 wrong in this case because we rely on the
7415 (plus (plus reg c1) c2) structure being preserved so that
7416 XEXP (*p, 0) in push_reload below uses the correct term. */
7417 *p = gen_rtx_PLUS (GET_MODE (*p),
7418 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7419 GEN_INT (high)),
7420 GEN_INT (low));
7421 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7422 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7423 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7424 return true;
7427 return false;
7431 thumb_legitimize_reload_address (rtx *x_p,
7432 enum machine_mode mode,
7433 int opnum, int type,
7434 int ind_levels ATTRIBUTE_UNUSED)
7436 rtx x = *x_p;
7438 if (GET_CODE (x) == PLUS
7439 && GET_MODE_SIZE (mode) < 4
7440 && REG_P (XEXP (x, 0))
7441 && XEXP (x, 0) == stack_pointer_rtx
7442 && CONST_INT_P (XEXP (x, 1))
7443 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7445 rtx orig_x = x;
7447 x = copy_rtx (x);
7448 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7449 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7450 return x;
7453 /* If both registers are hi-regs, then it's better to reload the
7454 entire expression rather than each register individually. That
7455 only requires one reload register rather than two. */
7456 if (GET_CODE (x) == PLUS
7457 && REG_P (XEXP (x, 0))
7458 && REG_P (XEXP (x, 1))
7459 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7460 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7462 rtx orig_x = x;
7464 x = copy_rtx (x);
7465 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7466 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7467 return x;
7470 return NULL;
7473 /* Test for various thread-local symbols. */
7475 /* Return TRUE if X is a thread-local symbol. */
7477 static bool
7478 arm_tls_symbol_p (rtx x)
7480 if (! TARGET_HAVE_TLS)
7481 return false;
7483 if (GET_CODE (x) != SYMBOL_REF)
7484 return false;
7486 return SYMBOL_REF_TLS_MODEL (x) != 0;
7489 /* Helper for arm_tls_referenced_p. */
7491 static int
7492 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7494 if (GET_CODE (*x) == SYMBOL_REF)
7495 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7497 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7498 TLS offsets, not real symbol references. */
7499 if (GET_CODE (*x) == UNSPEC
7500 && XINT (*x, 1) == UNSPEC_TLS)
7501 return -1;
7503 return 0;
7506 /* Return TRUE if X contains any TLS symbol references. */
7508 bool
7509 arm_tls_referenced_p (rtx x)
7511 if (! TARGET_HAVE_TLS)
7512 return false;
7514 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7517 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7519 On the ARM, allow any integer (invalid ones are removed later by insn
7520 patterns), nice doubles and symbol_refs which refer to the function's
7521 constant pool XXX.
7523 When generating pic allow anything. */
7525 static bool
7526 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7528 /* At present, we have no support for Neon structure constants, so forbid
7529 them here. It might be possible to handle simple cases like 0 and -1
7530 in future. */
7531 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7532 return false;
7534 return flag_pic || !label_mentioned_p (x);
7537 static bool
7538 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7540 return (CONST_INT_P (x)
7541 || CONST_DOUBLE_P (x)
7542 || CONSTANT_ADDRESS_P (x)
7543 || flag_pic);
7546 static bool
7547 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7549 return (!arm_cannot_force_const_mem (mode, x)
7550 && (TARGET_32BIT
7551 ? arm_legitimate_constant_p_1 (mode, x)
7552 : thumb_legitimate_constant_p (mode, x)));
7555 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7557 static bool
7558 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7560 rtx base, offset;
7562 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7564 split_const (x, &base, &offset);
7565 if (GET_CODE (base) == SYMBOL_REF
7566 && !offset_within_block_p (base, INTVAL (offset)))
7567 return true;
7569 return arm_tls_referenced_p (x);
7572 #define REG_OR_SUBREG_REG(X) \
7573 (REG_P (X) \
7574 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7576 #define REG_OR_SUBREG_RTX(X) \
7577 (REG_P (X) ? (X) : SUBREG_REG (X))
7579 static inline int
7580 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7582 enum machine_mode mode = GET_MODE (x);
7583 int total, words;
7585 switch (code)
7587 case ASHIFT:
7588 case ASHIFTRT:
7589 case LSHIFTRT:
7590 case ROTATERT:
7591 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7593 case PLUS:
7594 case MINUS:
7595 case COMPARE:
7596 case NEG:
7597 case NOT:
7598 return COSTS_N_INSNS (1);
7600 case MULT:
7601 if (CONST_INT_P (XEXP (x, 1)))
7603 int cycles = 0;
7604 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7606 while (i)
7608 i >>= 2;
7609 cycles++;
7611 return COSTS_N_INSNS (2) + cycles;
7613 return COSTS_N_INSNS (1) + 16;
7615 case SET:
7616 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7617 the mode. */
7618 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7619 return (COSTS_N_INSNS (words)
7620 + 4 * ((MEM_P (SET_SRC (x)))
7621 + MEM_P (SET_DEST (x))));
7623 case CONST_INT:
7624 if (outer == SET)
7626 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7627 return 0;
7628 if (thumb_shiftable_const (INTVAL (x)))
7629 return COSTS_N_INSNS (2);
7630 return COSTS_N_INSNS (3);
7632 else if ((outer == PLUS || outer == COMPARE)
7633 && INTVAL (x) < 256 && INTVAL (x) > -256)
7634 return 0;
7635 else if ((outer == IOR || outer == XOR || outer == AND)
7636 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7637 return COSTS_N_INSNS (1);
7638 else if (outer == AND)
7640 int i;
7641 /* This duplicates the tests in the andsi3 expander. */
7642 for (i = 9; i <= 31; i++)
7643 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7644 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7645 return COSTS_N_INSNS (2);
7647 else if (outer == ASHIFT || outer == ASHIFTRT
7648 || outer == LSHIFTRT)
7649 return 0;
7650 return COSTS_N_INSNS (2);
7652 case CONST:
7653 case CONST_DOUBLE:
7654 case LABEL_REF:
7655 case SYMBOL_REF:
7656 return COSTS_N_INSNS (3);
7658 case UDIV:
7659 case UMOD:
7660 case DIV:
7661 case MOD:
7662 return 100;
7664 case TRUNCATE:
7665 return 99;
7667 case AND:
7668 case XOR:
7669 case IOR:
7670 /* XXX guess. */
7671 return 8;
7673 case MEM:
7674 /* XXX another guess. */
7675 /* Memory costs quite a lot for the first word, but subsequent words
7676 load at the equivalent of a single insn each. */
7677 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7678 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7679 ? 4 : 0));
7681 case IF_THEN_ELSE:
7682 /* XXX a guess. */
7683 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7684 return 14;
7685 return 2;
7687 case SIGN_EXTEND:
7688 case ZERO_EXTEND:
7689 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7690 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7692 if (mode == SImode)
7693 return total;
7695 if (arm_arch6)
7696 return total + COSTS_N_INSNS (1);
7698 /* Assume a two-shift sequence. Increase the cost slightly so
7699 we prefer actual shifts over an extend operation. */
7700 return total + 1 + COSTS_N_INSNS (2);
7702 default:
7703 return 99;
7707 static inline bool
7708 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7710 enum machine_mode mode = GET_MODE (x);
7711 enum rtx_code subcode;
7712 rtx operand;
7713 enum rtx_code code = GET_CODE (x);
7714 *total = 0;
7716 switch (code)
7718 case MEM:
7719 /* Memory costs quite a lot for the first word, but subsequent words
7720 load at the equivalent of a single insn each. */
7721 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7722 return true;
7724 case DIV:
7725 case MOD:
7726 case UDIV:
7727 case UMOD:
7728 if (TARGET_HARD_FLOAT && mode == SFmode)
7729 *total = COSTS_N_INSNS (2);
7730 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7731 *total = COSTS_N_INSNS (4);
7732 else
7733 *total = COSTS_N_INSNS (20);
7734 return false;
7736 case ROTATE:
7737 if (REG_P (XEXP (x, 1)))
7738 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7739 else if (!CONST_INT_P (XEXP (x, 1)))
7740 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7742 /* Fall through */
7743 case ROTATERT:
7744 if (mode != SImode)
7746 *total += COSTS_N_INSNS (4);
7747 return true;
7750 /* Fall through */
7751 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7752 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7753 if (mode == DImode)
7755 *total += COSTS_N_INSNS (3);
7756 return true;
7759 *total += COSTS_N_INSNS (1);
7760 /* Increase the cost of complex shifts because they aren't any faster,
7761 and reduce dual issue opportunities. */
7762 if (arm_tune_cortex_a9
7763 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7764 ++*total;
7766 return true;
7768 case MINUS:
7769 if (mode == DImode)
7771 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7772 if (CONST_INT_P (XEXP (x, 0))
7773 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7775 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7776 return true;
7779 if (CONST_INT_P (XEXP (x, 1))
7780 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7782 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7783 return true;
7786 return false;
7789 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7791 if (TARGET_HARD_FLOAT
7792 && (mode == SFmode
7793 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7795 *total = COSTS_N_INSNS (1);
7796 if (CONST_DOUBLE_P (XEXP (x, 0))
7797 && arm_const_double_rtx (XEXP (x, 0)))
7799 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7800 return true;
7803 if (CONST_DOUBLE_P (XEXP (x, 1))
7804 && arm_const_double_rtx (XEXP (x, 1)))
7806 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7807 return true;
7810 return false;
7812 *total = COSTS_N_INSNS (20);
7813 return false;
7816 *total = COSTS_N_INSNS (1);
7817 if (CONST_INT_P (XEXP (x, 0))
7818 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7820 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7821 return true;
7824 subcode = GET_CODE (XEXP (x, 1));
7825 if (subcode == ASHIFT || subcode == ASHIFTRT
7826 || subcode == LSHIFTRT
7827 || subcode == ROTATE || subcode == ROTATERT)
7829 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7830 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7831 return true;
7834 /* A shift as a part of RSB costs no more than RSB itself. */
7835 if (GET_CODE (XEXP (x, 0)) == MULT
7836 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7838 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7839 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7840 return true;
7843 if (subcode == MULT
7844 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7846 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7847 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7848 return true;
7851 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7852 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7854 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7855 if (REG_P (XEXP (XEXP (x, 1), 0))
7856 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7857 *total += COSTS_N_INSNS (1);
7859 return true;
7862 /* Fall through */
7864 case PLUS:
7865 if (code == PLUS && arm_arch6 && mode == SImode
7866 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7867 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7869 *total = COSTS_N_INSNS (1);
7870 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7871 0, speed);
7872 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7873 return true;
7876 /* MLA: All arguments must be registers. We filter out
7877 multiplication by a power of two, so that we fall down into
7878 the code below. */
7879 if (GET_CODE (XEXP (x, 0)) == MULT
7880 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7882 /* The cost comes from the cost of the multiply. */
7883 return false;
7886 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7888 if (TARGET_HARD_FLOAT
7889 && (mode == SFmode
7890 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7892 *total = COSTS_N_INSNS (1);
7893 if (CONST_DOUBLE_P (XEXP (x, 1))
7894 && arm_const_double_rtx (XEXP (x, 1)))
7896 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7897 return true;
7900 return false;
7903 *total = COSTS_N_INSNS (20);
7904 return false;
7907 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7908 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7910 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7911 if (REG_P (XEXP (XEXP (x, 0), 0))
7912 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7913 *total += COSTS_N_INSNS (1);
7914 return true;
7917 /* Fall through */
7919 case AND: case XOR: case IOR:
7921 /* Normally the frame registers will be spilt into reg+const during
7922 reload, so it is a bad idea to combine them with other instructions,
7923 since then they might not be moved outside of loops. As a compromise
7924 we allow integration with ops that have a constant as their second
7925 operand. */
7926 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7927 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7928 && !CONST_INT_P (XEXP (x, 1)))
7929 *total = COSTS_N_INSNS (1);
7931 if (mode == DImode)
7933 *total += COSTS_N_INSNS (2);
7934 if (CONST_INT_P (XEXP (x, 1))
7935 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7937 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7938 return true;
7941 return false;
7944 *total += COSTS_N_INSNS (1);
7945 if (CONST_INT_P (XEXP (x, 1))
7946 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7948 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7949 return true;
7951 subcode = GET_CODE (XEXP (x, 0));
7952 if (subcode == ASHIFT || subcode == ASHIFTRT
7953 || subcode == LSHIFTRT
7954 || subcode == ROTATE || subcode == ROTATERT)
7956 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7957 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7958 return true;
7961 if (subcode == MULT
7962 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7964 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7965 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7966 return true;
7969 if (subcode == UMIN || subcode == UMAX
7970 || subcode == SMIN || subcode == SMAX)
7972 *total = COSTS_N_INSNS (3);
7973 return true;
7976 return false;
7978 case MULT:
7979 /* This should have been handled by the CPU specific routines. */
7980 gcc_unreachable ();
7982 case TRUNCATE:
7983 if (arm_arch3m && mode == SImode
7984 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7985 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7986 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7987 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7988 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7989 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7991 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7992 return true;
7994 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7995 return false;
7997 case NEG:
7998 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8000 if (TARGET_HARD_FLOAT
8001 && (mode == SFmode
8002 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8004 *total = COSTS_N_INSNS (1);
8005 return false;
8007 *total = COSTS_N_INSNS (2);
8008 return false;
8011 /* Fall through */
8012 case NOT:
8013 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8014 if (mode == SImode && code == NOT)
8016 subcode = GET_CODE (XEXP (x, 0));
8017 if (subcode == ASHIFT || subcode == ASHIFTRT
8018 || subcode == LSHIFTRT
8019 || subcode == ROTATE || subcode == ROTATERT
8020 || (subcode == MULT
8021 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8023 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8024 /* Register shifts cost an extra cycle. */
8025 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8026 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8027 subcode, 1, speed);
8028 return true;
8032 return false;
8034 case IF_THEN_ELSE:
8035 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8037 *total = COSTS_N_INSNS (4);
8038 return true;
8041 operand = XEXP (x, 0);
8043 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8044 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8045 && REG_P (XEXP (operand, 0))
8046 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8047 *total += COSTS_N_INSNS (1);
8048 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8049 + rtx_cost (XEXP (x, 2), code, 2, speed));
8050 return true;
8052 case NE:
8053 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8055 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8056 return true;
8058 goto scc_insn;
8060 case GE:
8061 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8062 && mode == SImode && XEXP (x, 1) == const0_rtx)
8064 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8065 return true;
8067 goto scc_insn;
8069 case LT:
8070 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8071 && mode == SImode && XEXP (x, 1) == const0_rtx)
8073 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8074 return true;
8076 goto scc_insn;
8078 case EQ:
8079 case GT:
8080 case LE:
8081 case GEU:
8082 case LTU:
8083 case GTU:
8084 case LEU:
8085 case UNORDERED:
8086 case ORDERED:
8087 case UNEQ:
8088 case UNGE:
8089 case UNLT:
8090 case UNGT:
8091 case UNLE:
8092 scc_insn:
8093 /* SCC insns. In the case where the comparison has already been
8094 performed, then they cost 2 instructions. Otherwise they need
8095 an additional comparison before them. */
8096 *total = COSTS_N_INSNS (2);
8097 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8099 return true;
8102 /* Fall through */
8103 case COMPARE:
8104 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8106 *total = 0;
8107 return true;
8110 *total += COSTS_N_INSNS (1);
8111 if (CONST_INT_P (XEXP (x, 1))
8112 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8114 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8115 return true;
8118 subcode = GET_CODE (XEXP (x, 0));
8119 if (subcode == ASHIFT || subcode == ASHIFTRT
8120 || subcode == LSHIFTRT
8121 || subcode == ROTATE || subcode == ROTATERT)
8123 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8124 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8125 return true;
8128 if (subcode == MULT
8129 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8131 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8132 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8133 return true;
8136 return false;
8138 case UMIN:
8139 case UMAX:
8140 case SMIN:
8141 case SMAX:
8142 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8143 if (!CONST_INT_P (XEXP (x, 1))
8144 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8145 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8146 return true;
8148 case ABS:
8149 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8151 if (TARGET_HARD_FLOAT
8152 && (mode == SFmode
8153 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8155 *total = COSTS_N_INSNS (1);
8156 return false;
8158 *total = COSTS_N_INSNS (20);
8159 return false;
8161 *total = COSTS_N_INSNS (1);
8162 if (mode == DImode)
8163 *total += COSTS_N_INSNS (3);
8164 return false;
8166 case SIGN_EXTEND:
8167 case ZERO_EXTEND:
8168 *total = 0;
8169 if (GET_MODE_CLASS (mode) == MODE_INT)
8171 rtx op = XEXP (x, 0);
8172 enum machine_mode opmode = GET_MODE (op);
8174 if (mode == DImode)
8175 *total += COSTS_N_INSNS (1);
8177 if (opmode != SImode)
8179 if (MEM_P (op))
8181 /* If !arm_arch4, we use one of the extendhisi2_mem
8182 or movhi_bytes patterns for HImode. For a QImode
8183 sign extension, we first zero-extend from memory
8184 and then perform a shift sequence. */
8185 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8186 *total += COSTS_N_INSNS (2);
8188 else if (arm_arch6)
8189 *total += COSTS_N_INSNS (1);
8191 /* We don't have the necessary insn, so we need to perform some
8192 other operation. */
8193 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8194 /* An and with constant 255. */
8195 *total += COSTS_N_INSNS (1);
8196 else
8197 /* A shift sequence. Increase costs slightly to avoid
8198 combining two shifts into an extend operation. */
8199 *total += COSTS_N_INSNS (2) + 1;
8202 return false;
8205 switch (GET_MODE (XEXP (x, 0)))
8207 case V8QImode:
8208 case V4HImode:
8209 case V2SImode:
8210 case V4QImode:
8211 case V2HImode:
8212 *total = COSTS_N_INSNS (1);
8213 return false;
8215 default:
8216 gcc_unreachable ();
8218 gcc_unreachable ();
8220 case ZERO_EXTRACT:
8221 case SIGN_EXTRACT:
8222 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8223 return true;
8225 case CONST_INT:
8226 if (const_ok_for_arm (INTVAL (x))
8227 || const_ok_for_arm (~INTVAL (x)))
8228 *total = COSTS_N_INSNS (1);
8229 else
8230 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8231 INTVAL (x), NULL_RTX,
8232 NULL_RTX, 0, 0));
8233 return true;
8235 case CONST:
8236 case LABEL_REF:
8237 case SYMBOL_REF:
8238 *total = COSTS_N_INSNS (3);
8239 return true;
8241 case HIGH:
8242 *total = COSTS_N_INSNS (1);
8243 return true;
8245 case LO_SUM:
8246 *total = COSTS_N_INSNS (1);
8247 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8248 return true;
8250 case CONST_DOUBLE:
8251 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8252 && (mode == SFmode || !TARGET_VFP_SINGLE))
8253 *total = COSTS_N_INSNS (1);
8254 else
8255 *total = COSTS_N_INSNS (4);
8256 return true;
8258 case SET:
8259 /* The vec_extract patterns accept memory operands that require an
8260 address reload. Account for the cost of that reload to give the
8261 auto-inc-dec pass an incentive to try to replace them. */
8262 if (TARGET_NEON && MEM_P (SET_DEST (x))
8263 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8265 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8266 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8267 *total += COSTS_N_INSNS (1);
8268 return true;
8270 /* Likewise for the vec_set patterns. */
8271 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8272 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8273 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8275 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8276 *total = rtx_cost (mem, code, 0, speed);
8277 if (!neon_vector_mem_operand (mem, 2, true))
8278 *total += COSTS_N_INSNS (1);
8279 return true;
8281 return false;
8283 case UNSPEC:
8284 /* We cost this as high as our memory costs to allow this to
8285 be hoisted from loops. */
8286 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8288 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8290 return true;
8292 case CONST_VECTOR:
8293 if (TARGET_NEON
8294 && TARGET_HARD_FLOAT
8295 && outer == SET
8296 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8297 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8298 *total = COSTS_N_INSNS (1);
8299 else
8300 *total = COSTS_N_INSNS (4);
8301 return true;
8303 default:
8304 *total = COSTS_N_INSNS (4);
8305 return false;
8309 /* Estimates the size cost of thumb1 instructions.
8310 For now most of the code is copied from thumb1_rtx_costs. We need more
8311 fine grain tuning when we have more related test cases. */
8312 static inline int
8313 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8315 enum machine_mode mode = GET_MODE (x);
8316 int words;
8318 switch (code)
8320 case ASHIFT:
8321 case ASHIFTRT:
8322 case LSHIFTRT:
8323 case ROTATERT:
8324 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8326 case PLUS:
8327 case MINUS:
8328 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8329 defined by RTL expansion, especially for the expansion of
8330 multiplication. */
8331 if ((GET_CODE (XEXP (x, 0)) == MULT
8332 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8333 || (GET_CODE (XEXP (x, 1)) == MULT
8334 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8335 return COSTS_N_INSNS (2);
8336 /* On purpose fall through for normal RTX. */
8337 case COMPARE:
8338 case NEG:
8339 case NOT:
8340 return COSTS_N_INSNS (1);
8342 case MULT:
8343 if (CONST_INT_P (XEXP (x, 1)))
8345 /* Thumb1 mul instruction can't operate on const. We must Load it
8346 into a register first. */
8347 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8348 return COSTS_N_INSNS (1) + const_size;
8350 return COSTS_N_INSNS (1);
8352 case SET:
8353 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8354 the mode. */
8355 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8356 return (COSTS_N_INSNS (words)
8357 + 4 * ((MEM_P (SET_SRC (x)))
8358 + MEM_P (SET_DEST (x))));
8360 case CONST_INT:
8361 if (outer == SET)
8363 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8364 return COSTS_N_INSNS (1);
8365 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8366 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8367 return COSTS_N_INSNS (2);
8368 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8369 if (thumb_shiftable_const (INTVAL (x)))
8370 return COSTS_N_INSNS (2);
8371 return COSTS_N_INSNS (3);
8373 else if ((outer == PLUS || outer == COMPARE)
8374 && INTVAL (x) < 256 && INTVAL (x) > -256)
8375 return 0;
8376 else if ((outer == IOR || outer == XOR || outer == AND)
8377 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8378 return COSTS_N_INSNS (1);
8379 else if (outer == AND)
8381 int i;
8382 /* This duplicates the tests in the andsi3 expander. */
8383 for (i = 9; i <= 31; i++)
8384 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8385 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8386 return COSTS_N_INSNS (2);
8388 else if (outer == ASHIFT || outer == ASHIFTRT
8389 || outer == LSHIFTRT)
8390 return 0;
8391 return COSTS_N_INSNS (2);
8393 case CONST:
8394 case CONST_DOUBLE:
8395 case LABEL_REF:
8396 case SYMBOL_REF:
8397 return COSTS_N_INSNS (3);
8399 case UDIV:
8400 case UMOD:
8401 case DIV:
8402 case MOD:
8403 return 100;
8405 case TRUNCATE:
8406 return 99;
8408 case AND:
8409 case XOR:
8410 case IOR:
8411 /* XXX guess. */
8412 return 8;
8414 case MEM:
8415 /* XXX another guess. */
8416 /* Memory costs quite a lot for the first word, but subsequent words
8417 load at the equivalent of a single insn each. */
8418 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8419 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8420 ? 4 : 0));
8422 case IF_THEN_ELSE:
8423 /* XXX a guess. */
8424 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8425 return 14;
8426 return 2;
8428 case ZERO_EXTEND:
8429 /* XXX still guessing. */
8430 switch (GET_MODE (XEXP (x, 0)))
8432 case QImode:
8433 return (1 + (mode == DImode ? 4 : 0)
8434 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8436 case HImode:
8437 return (4 + (mode == DImode ? 4 : 0)
8438 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8440 case SImode:
8441 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8443 default:
8444 return 99;
8447 default:
8448 return 99;
8452 /* RTX costs when optimizing for size. */
8453 static bool
8454 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8455 int *total)
8457 enum machine_mode mode = GET_MODE (x);
8458 if (TARGET_THUMB1)
8460 *total = thumb1_size_rtx_costs (x, code, outer_code);
8461 return true;
8464 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8465 switch (code)
8467 case MEM:
8468 /* A memory access costs 1 insn if the mode is small, or the address is
8469 a single register, otherwise it costs one insn per word. */
8470 if (REG_P (XEXP (x, 0)))
8471 *total = COSTS_N_INSNS (1);
8472 else if (flag_pic
8473 && GET_CODE (XEXP (x, 0)) == PLUS
8474 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8475 /* This will be split into two instructions.
8476 See arm.md:calculate_pic_address. */
8477 *total = COSTS_N_INSNS (2);
8478 else
8479 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8480 return true;
8482 case DIV:
8483 case MOD:
8484 case UDIV:
8485 case UMOD:
8486 /* Needs a libcall, so it costs about this. */
8487 *total = COSTS_N_INSNS (2);
8488 return false;
8490 case ROTATE:
8491 if (mode == SImode && REG_P (XEXP (x, 1)))
8493 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8494 return true;
8496 /* Fall through */
8497 case ROTATERT:
8498 case ASHIFT:
8499 case LSHIFTRT:
8500 case ASHIFTRT:
8501 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8503 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8504 return true;
8506 else if (mode == SImode)
8508 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8509 /* Slightly disparage register shifts, but not by much. */
8510 if (!CONST_INT_P (XEXP (x, 1)))
8511 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8512 return true;
8515 /* Needs a libcall. */
8516 *total = COSTS_N_INSNS (2);
8517 return false;
8519 case MINUS:
8520 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8521 && (mode == SFmode || !TARGET_VFP_SINGLE))
8523 *total = COSTS_N_INSNS (1);
8524 return false;
8527 if (mode == SImode)
8529 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8530 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8532 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8533 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8534 || subcode1 == ROTATE || subcode1 == ROTATERT
8535 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8536 || subcode1 == ASHIFTRT)
8538 /* It's just the cost of the two operands. */
8539 *total = 0;
8540 return false;
8543 *total = COSTS_N_INSNS (1);
8544 return false;
8547 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8548 return false;
8550 case PLUS:
8551 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8552 && (mode == SFmode || !TARGET_VFP_SINGLE))
8554 *total = COSTS_N_INSNS (1);
8555 return false;
8558 /* A shift as a part of ADD costs nothing. */
8559 if (GET_CODE (XEXP (x, 0)) == MULT
8560 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8562 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8563 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8564 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8565 return true;
8568 /* Fall through */
8569 case AND: case XOR: case IOR:
8570 if (mode == SImode)
8572 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8574 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8575 || subcode == LSHIFTRT || subcode == ASHIFTRT
8576 || (code == AND && subcode == NOT))
8578 /* It's just the cost of the two operands. */
8579 *total = 0;
8580 return false;
8584 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8585 return false;
8587 case MULT:
8588 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8589 return false;
8591 case NEG:
8592 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8593 && (mode == SFmode || !TARGET_VFP_SINGLE))
8595 *total = COSTS_N_INSNS (1);
8596 return false;
8599 /* Fall through */
8600 case NOT:
8601 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8603 return false;
8605 case IF_THEN_ELSE:
8606 *total = 0;
8607 return false;
8609 case COMPARE:
8610 if (cc_register (XEXP (x, 0), VOIDmode))
8611 * total = 0;
8612 else
8613 *total = COSTS_N_INSNS (1);
8614 return false;
8616 case ABS:
8617 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8618 && (mode == SFmode || !TARGET_VFP_SINGLE))
8619 *total = COSTS_N_INSNS (1);
8620 else
8621 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8622 return false;
8624 case SIGN_EXTEND:
8625 case ZERO_EXTEND:
8626 return arm_rtx_costs_1 (x, outer_code, total, 0);
8628 case CONST_INT:
8629 if (const_ok_for_arm (INTVAL (x)))
8630 /* A multiplication by a constant requires another instruction
8631 to load the constant to a register. */
8632 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8633 ? 1 : 0);
8634 else if (const_ok_for_arm (~INTVAL (x)))
8635 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8636 else if (const_ok_for_arm (-INTVAL (x)))
8638 if (outer_code == COMPARE || outer_code == PLUS
8639 || outer_code == MINUS)
8640 *total = 0;
8641 else
8642 *total = COSTS_N_INSNS (1);
8644 else
8645 *total = COSTS_N_INSNS (2);
8646 return true;
8648 case CONST:
8649 case LABEL_REF:
8650 case SYMBOL_REF:
8651 *total = COSTS_N_INSNS (2);
8652 return true;
8654 case CONST_DOUBLE:
8655 *total = COSTS_N_INSNS (4);
8656 return true;
8658 case CONST_VECTOR:
8659 if (TARGET_NEON
8660 && TARGET_HARD_FLOAT
8661 && outer_code == SET
8662 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8663 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8664 *total = COSTS_N_INSNS (1);
8665 else
8666 *total = COSTS_N_INSNS (4);
8667 return true;
8669 case HIGH:
8670 case LO_SUM:
8671 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8672 cost of these slightly. */
8673 *total = COSTS_N_INSNS (1) + 1;
8674 return true;
8676 case SET:
8677 return false;
8679 default:
8680 if (mode != VOIDmode)
8681 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8682 else
8683 *total = COSTS_N_INSNS (4); /* How knows? */
8684 return false;
8688 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8689 operand, then return the operand that is being shifted. If the shift
8690 is not by a constant, then set SHIFT_REG to point to the operand.
8691 Return NULL if OP is not a shifter operand. */
8692 static rtx
8693 shifter_op_p (rtx op, rtx *shift_reg)
8695 enum rtx_code code = GET_CODE (op);
8697 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8698 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8699 return XEXP (op, 0);
8700 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8701 return XEXP (op, 0);
8702 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8703 || code == ASHIFTRT)
8705 if (!CONST_INT_P (XEXP (op, 1)))
8706 *shift_reg = XEXP (op, 1);
8707 return XEXP (op, 0);
8710 return NULL;
8713 static bool
8714 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8716 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8717 gcc_assert (GET_CODE (x) == UNSPEC);
8719 switch (XINT (x, 1))
8721 case UNSPEC_UNALIGNED_LOAD:
8722 /* We can only do unaligned loads into the integer unit, and we can't
8723 use LDM or LDRD. */
8724 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8725 if (speed_p)
8726 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8727 + extra_cost->ldst.load_unaligned);
8729 #ifdef NOT_YET
8730 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8731 ADDR_SPACE_GENERIC, speed_p);
8732 #endif
8733 return true;
8735 case UNSPEC_UNALIGNED_STORE:
8736 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8737 if (speed_p)
8738 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
8739 + extra_cost->ldst.store_unaligned);
8741 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
8742 #ifdef NOT_YET
8743 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8744 ADDR_SPACE_GENERIC, speed_p);
8745 #endif
8746 return true;
8748 case UNSPEC_VRINTZ:
8749 case UNSPEC_VRINTP:
8750 case UNSPEC_VRINTM:
8751 case UNSPEC_VRINTR:
8752 case UNSPEC_VRINTX:
8753 case UNSPEC_VRINTA:
8754 *cost = COSTS_N_INSNS (1);
8755 if (speed_p)
8756 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
8758 return true;
8759 default:
8760 *cost = COSTS_N_INSNS (2);
8761 break;
8763 return false;
8766 /* Cost of a libcall. We assume one insn per argument, an amount for the
8767 call (one insn for -Os) and then one for processing the result. */
8768 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8770 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
8771 do \
8773 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
8774 if (shift_op != NULL \
8775 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
8777 if (shift_reg) \
8779 if (speed_p) \
8780 *cost += extra_cost->alu.arith_shift_reg; \
8781 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
8783 else if (speed_p) \
8784 *cost += extra_cost->alu.arith_shift; \
8786 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
8787 + rtx_cost (XEXP (x, 1 - IDX), \
8788 OP, 1, speed_p)); \
8789 return true; \
8792 while (0);
8794 /* RTX costs. Make an estimate of the cost of executing the operation
8795 X, which is contained with an operation with code OUTER_CODE.
8796 SPEED_P indicates whether the cost desired is the performance cost,
8797 or the size cost. The estimate is stored in COST and the return
8798 value is TRUE if the cost calculation is final, or FALSE if the
8799 caller should recurse through the operands of X to add additional
8800 costs.
8802 We currently make no attempt to model the size savings of Thumb-2
8803 16-bit instructions. At the normal points in compilation where
8804 this code is called we have no measure of whether the condition
8805 flags are live or not, and thus no realistic way to determine what
8806 the size will eventually be. */
8807 static bool
8808 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8809 const struct cpu_cost_table *extra_cost,
8810 int *cost, bool speed_p)
8812 enum machine_mode mode = GET_MODE (x);
8814 if (TARGET_THUMB1)
8816 if (speed_p)
8817 *cost = thumb1_rtx_costs (x, code, outer_code);
8818 else
8819 *cost = thumb1_size_rtx_costs (x, code, outer_code);
8820 return true;
8823 switch (code)
8825 case SET:
8826 *cost = 0;
8827 if (REG_P (SET_SRC (x))
8828 && REG_P (SET_DEST (x)))
8830 /* Assume that most copies can be done with a single insn,
8831 unless we don't have HW FP, in which case everything
8832 larger than word mode will require two insns. */
8833 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8834 && GET_MODE_SIZE (mode) > 4)
8835 || mode == DImode)
8836 ? 2 : 1);
8837 /* Conditional register moves can be encoded
8838 in 16 bits in Thumb mode. */
8839 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
8840 *cost >>= 1;
8843 if (CONST_INT_P (SET_SRC (x)))
8845 /* Handle CONST_INT here, since the value doesn't have a mode
8846 and we would otherwise be unable to work out the true cost. */
8847 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
8848 mode = GET_MODE (SET_DEST (x));
8849 outer_code = SET;
8850 /* Slightly lower the cost of setting a core reg to a constant.
8851 This helps break up chains and allows for better scheduling. */
8852 if (REG_P (SET_DEST (x))
8853 && REGNO (SET_DEST (x)) <= LR_REGNUM)
8854 *cost -= 1;
8855 x = SET_SRC (x);
8856 /* Immediate moves with an immediate in the range [0, 255] can be
8857 encoded in 16 bits in Thumb mode. */
8858 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
8859 && INTVAL (x) >= 0 && INTVAL (x) <=255)
8860 *cost >>= 1;
8861 goto const_int_cost;
8864 return false;
8866 case MEM:
8867 /* A memory access costs 1 insn if the mode is small, or the address is
8868 a single register, otherwise it costs one insn per word. */
8869 if (REG_P (XEXP (x, 0)))
8870 *cost = COSTS_N_INSNS (1);
8871 else if (flag_pic
8872 && GET_CODE (XEXP (x, 0)) == PLUS
8873 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8874 /* This will be split into two instructions.
8875 See arm.md:calculate_pic_address. */
8876 *cost = COSTS_N_INSNS (2);
8877 else
8878 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8880 /* For speed optimizations, add the costs of the address and
8881 accessing memory. */
8882 if (speed_p)
8883 #ifdef NOT_YET
8884 *cost += (extra_cost->ldst.load
8885 + arm_address_cost (XEXP (x, 0), mode,
8886 ADDR_SPACE_GENERIC, speed_p));
8887 #else
8888 *cost += extra_cost->ldst.load;
8889 #endif
8890 return true;
8892 case PARALLEL:
8894 /* Calculations of LDM costs are complex. We assume an initial cost
8895 (ldm_1st) which will load the number of registers mentioned in
8896 ldm_regs_per_insn_1st registers; then each additional
8897 ldm_regs_per_insn_subsequent registers cost one more insn. The
8898 formula for N regs is thus:
8900 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8901 + ldm_regs_per_insn_subsequent - 1)
8902 / ldm_regs_per_insn_subsequent).
8904 Additional costs may also be added for addressing. A similar
8905 formula is used for STM. */
8907 bool is_ldm = load_multiple_operation (x, SImode);
8908 bool is_stm = store_multiple_operation (x, SImode);
8910 *cost = COSTS_N_INSNS (1);
8912 if (is_ldm || is_stm)
8914 if (speed_p)
8916 HOST_WIDE_INT nregs = XVECLEN (x, 0);
8917 HOST_WIDE_INT regs_per_insn_1st = is_ldm
8918 ? extra_cost->ldst.ldm_regs_per_insn_1st
8919 : extra_cost->ldst.stm_regs_per_insn_1st;
8920 HOST_WIDE_INT regs_per_insn_sub = is_ldm
8921 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
8922 : extra_cost->ldst.stm_regs_per_insn_subsequent;
8924 *cost += regs_per_insn_1st
8925 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
8926 + regs_per_insn_sub - 1)
8927 / regs_per_insn_sub);
8928 return true;
8932 return false;
8934 case DIV:
8935 case UDIV:
8936 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8937 && (mode == SFmode || !TARGET_VFP_SINGLE))
8938 *cost = COSTS_N_INSNS (speed_p
8939 ? extra_cost->fp[mode != SFmode].div : 1);
8940 else if (mode == SImode && TARGET_IDIV)
8941 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
8942 else
8943 *cost = LIBCALL_COST (2);
8944 return false; /* All arguments must be in registers. */
8946 case MOD:
8947 case UMOD:
8948 *cost = LIBCALL_COST (2);
8949 return false; /* All arguments must be in registers. */
8951 case ROTATE:
8952 if (mode == SImode && REG_P (XEXP (x, 1)))
8954 *cost = (COSTS_N_INSNS (2)
8955 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8956 if (speed_p)
8957 *cost += extra_cost->alu.shift_reg;
8958 return true;
8960 /* Fall through */
8961 case ROTATERT:
8962 case ASHIFT:
8963 case LSHIFTRT:
8964 case ASHIFTRT:
8965 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8967 *cost = (COSTS_N_INSNS (3)
8968 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8969 if (speed_p)
8970 *cost += 2 * extra_cost->alu.shift;
8971 return true;
8973 else if (mode == SImode)
8975 *cost = (COSTS_N_INSNS (1)
8976 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8977 /* Slightly disparage register shifts at -Os, but not by much. */
8978 if (!CONST_INT_P (XEXP (x, 1)))
8979 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8980 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
8981 return true;
8983 else if (GET_MODE_CLASS (mode) == MODE_INT
8984 && GET_MODE_SIZE (mode) < 4)
8986 if (code == ASHIFT)
8988 *cost = (COSTS_N_INSNS (1)
8989 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
8990 /* Slightly disparage register shifts at -Os, but not by
8991 much. */
8992 if (!CONST_INT_P (XEXP (x, 1)))
8993 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
8994 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
8996 else if (code == LSHIFTRT || code == ASHIFTRT)
8998 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9000 /* Can use SBFX/UBFX. */
9001 *cost = COSTS_N_INSNS (1);
9002 if (speed_p)
9003 *cost += extra_cost->alu.bfx;
9004 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9006 else
9008 *cost = COSTS_N_INSNS (2);
9009 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9010 if (speed_p)
9012 if (CONST_INT_P (XEXP (x, 1)))
9013 *cost += 2 * extra_cost->alu.shift;
9014 else
9015 *cost += (extra_cost->alu.shift
9016 + extra_cost->alu.shift_reg);
9018 else
9019 /* Slightly disparage register shifts. */
9020 *cost += !CONST_INT_P (XEXP (x, 1));
9023 else /* Rotates. */
9025 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9026 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9027 if (speed_p)
9029 if (CONST_INT_P (XEXP (x, 1)))
9030 *cost += (2 * extra_cost->alu.shift
9031 + extra_cost->alu.log_shift);
9032 else
9033 *cost += (extra_cost->alu.shift
9034 + extra_cost->alu.shift_reg
9035 + extra_cost->alu.log_shift_reg);
9038 return true;
9041 *cost = LIBCALL_COST (2);
9042 return false;
9044 case MINUS:
9045 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9046 && (mode == SFmode || !TARGET_VFP_SINGLE))
9048 *cost = COSTS_N_INSNS (1);
9049 if (GET_CODE (XEXP (x, 0)) == MULT
9050 || GET_CODE (XEXP (x, 1)) == MULT)
9052 rtx mul_op0, mul_op1, sub_op;
9054 if (speed_p)
9055 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9057 if (GET_CODE (XEXP (x, 0)) == MULT)
9059 mul_op0 = XEXP (XEXP (x, 0), 0);
9060 mul_op1 = XEXP (XEXP (x, 0), 1);
9061 sub_op = XEXP (x, 1);
9063 else
9065 mul_op0 = XEXP (XEXP (x, 1), 0);
9066 mul_op1 = XEXP (XEXP (x, 1), 1);
9067 sub_op = XEXP (x, 0);
9070 /* The first operand of the multiply may be optionally
9071 negated. */
9072 if (GET_CODE (mul_op0) == NEG)
9073 mul_op0 = XEXP (mul_op0, 0);
9075 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9076 + rtx_cost (mul_op1, code, 0, speed_p)
9077 + rtx_cost (sub_op, code, 0, speed_p));
9079 return true;
9082 if (speed_p)
9083 *cost += extra_cost->fp[mode != SFmode].addsub;
9084 return false;
9087 if (mode == SImode)
9089 rtx shift_by_reg = NULL;
9090 rtx shift_op;
9091 rtx non_shift_op;
9093 *cost = COSTS_N_INSNS (1);
9095 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9096 if (shift_op == NULL)
9098 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9099 non_shift_op = XEXP (x, 0);
9101 else
9102 non_shift_op = XEXP (x, 1);
9104 if (shift_op != NULL)
9106 if (shift_by_reg != NULL)
9108 if (speed_p)
9109 *cost += extra_cost->alu.arith_shift_reg;
9110 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9112 else if (speed_p)
9113 *cost += extra_cost->alu.arith_shift;
9115 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9116 + rtx_cost (non_shift_op, code, 0, speed_p));
9117 return true;
9120 if (arm_arch_thumb2
9121 && GET_CODE (XEXP (x, 1)) == MULT)
9123 /* MLS. */
9124 if (speed_p)
9125 *cost += extra_cost->mult[0].add;
9126 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9127 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9128 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9129 return true;
9132 if (CONST_INT_P (XEXP (x, 0)))
9134 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9135 INTVAL (XEXP (x, 0)), NULL_RTX,
9136 NULL_RTX, 1, 0);
9137 *cost = COSTS_N_INSNS (insns);
9138 if (speed_p)
9139 *cost += insns * extra_cost->alu.arith;
9140 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9141 return true;
9144 return false;
9147 if (GET_MODE_CLASS (mode) == MODE_INT
9148 && GET_MODE_SIZE (mode) < 4)
9150 rtx shift_op, shift_reg;
9151 shift_reg = NULL;
9153 /* We check both sides of the MINUS for shifter operands since,
9154 unlike PLUS, it's not commutative. */
9156 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9157 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9159 /* Slightly disparage, as we might need to widen the result. */
9160 *cost = 1 + COSTS_N_INSNS (1);
9161 if (speed_p)
9162 *cost += extra_cost->alu.arith;
9164 if (CONST_INT_P (XEXP (x, 0)))
9166 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9167 return true;
9170 return false;
9173 if (mode == DImode)
9175 *cost = COSTS_N_INSNS (2);
9177 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9179 rtx op1 = XEXP (x, 1);
9181 if (speed_p)
9182 *cost += 2 * extra_cost->alu.arith;
9184 if (GET_CODE (op1) == ZERO_EXTEND)
9185 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9186 else
9187 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9188 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9189 0, speed_p);
9190 return true;
9192 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9194 if (speed_p)
9195 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9196 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9197 0, speed_p)
9198 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9199 return true;
9201 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9202 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9204 if (speed_p)
9205 *cost += (extra_cost->alu.arith
9206 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9207 ? extra_cost->alu.arith
9208 : extra_cost->alu.arith_shift));
9209 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9210 + rtx_cost (XEXP (XEXP (x, 1), 0),
9211 GET_CODE (XEXP (x, 1)), 0, speed_p));
9212 return true;
9215 if (speed_p)
9216 *cost += 2 * extra_cost->alu.arith;
9217 return false;
9220 /* Vector mode? */
9222 *cost = LIBCALL_COST (2);
9223 return false;
9225 case PLUS:
9226 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9227 && (mode == SFmode || !TARGET_VFP_SINGLE))
9229 *cost = COSTS_N_INSNS (1);
9230 if (GET_CODE (XEXP (x, 0)) == MULT)
9232 rtx mul_op0, mul_op1, add_op;
9234 if (speed_p)
9235 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9237 mul_op0 = XEXP (XEXP (x, 0), 0);
9238 mul_op1 = XEXP (XEXP (x, 0), 1);
9239 add_op = XEXP (x, 1);
9241 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9242 + rtx_cost (mul_op1, code, 0, speed_p)
9243 + rtx_cost (add_op, code, 0, speed_p));
9245 return true;
9248 if (speed_p)
9249 *cost += extra_cost->fp[mode != SFmode].addsub;
9250 return false;
9252 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9254 *cost = LIBCALL_COST (2);
9255 return false;
9258 /* Narrow modes can be synthesized in SImode, but the range
9259 of useful sub-operations is limited. Check for shift operations
9260 on one of the operands. Only left shifts can be used in the
9261 narrow modes. */
9262 if (GET_MODE_CLASS (mode) == MODE_INT
9263 && GET_MODE_SIZE (mode) < 4)
9265 rtx shift_op, shift_reg;
9266 shift_reg = NULL;
9268 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9270 if (CONST_INT_P (XEXP (x, 1)))
9272 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9273 INTVAL (XEXP (x, 1)), NULL_RTX,
9274 NULL_RTX, 1, 0);
9275 *cost = COSTS_N_INSNS (insns);
9276 if (speed_p)
9277 *cost += insns * extra_cost->alu.arith;
9278 /* Slightly penalize a narrow operation as the result may
9279 need widening. */
9280 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9281 return true;
9284 /* Slightly penalize a narrow operation as the result may
9285 need widening. */
9286 *cost = 1 + COSTS_N_INSNS (1);
9287 if (speed_p)
9288 *cost += extra_cost->alu.arith;
9290 return false;
9293 if (mode == SImode)
9295 rtx shift_op, shift_reg;
9297 *cost = COSTS_N_INSNS (1);
9298 if (TARGET_INT_SIMD
9299 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9300 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9302 /* UXTA[BH] or SXTA[BH]. */
9303 if (speed_p)
9304 *cost += extra_cost->alu.extnd_arith;
9305 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9306 speed_p)
9307 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9308 return true;
9311 shift_reg = NULL;
9312 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9313 if (shift_op != NULL)
9315 if (shift_reg)
9317 if (speed_p)
9318 *cost += extra_cost->alu.arith_shift_reg;
9319 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9321 else if (speed_p)
9322 *cost += extra_cost->alu.arith_shift;
9324 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9325 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9326 return true;
9328 if (GET_CODE (XEXP (x, 0)) == MULT)
9330 rtx mul_op = XEXP (x, 0);
9332 *cost = COSTS_N_INSNS (1);
9334 if (TARGET_DSP_MULTIPLY
9335 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9336 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9337 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9338 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9339 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9340 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9341 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9342 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9343 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9344 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9345 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9346 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9347 == 16))))))
9349 /* SMLA[BT][BT]. */
9350 if (speed_p)
9351 *cost += extra_cost->mult[0].extend_add;
9352 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9353 SIGN_EXTEND, 0, speed_p)
9354 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9355 SIGN_EXTEND, 0, speed_p)
9356 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9357 return true;
9360 if (speed_p)
9361 *cost += extra_cost->mult[0].add;
9362 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9363 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9364 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9365 return true;
9367 if (CONST_INT_P (XEXP (x, 1)))
9369 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9370 INTVAL (XEXP (x, 1)), NULL_RTX,
9371 NULL_RTX, 1, 0);
9372 *cost = COSTS_N_INSNS (insns);
9373 if (speed_p)
9374 *cost += insns * extra_cost->alu.arith;
9375 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9376 return true;
9378 return false;
9381 if (mode == DImode)
9383 if (arm_arch3m
9384 && GET_CODE (XEXP (x, 0)) == MULT
9385 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9386 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9387 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9388 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9390 *cost = COSTS_N_INSNS (1);
9391 if (speed_p)
9392 *cost += extra_cost->mult[1].extend_add;
9393 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9394 ZERO_EXTEND, 0, speed_p)
9395 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9396 ZERO_EXTEND, 0, speed_p)
9397 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9398 return true;
9401 *cost = COSTS_N_INSNS (2);
9403 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9404 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9406 if (speed_p)
9407 *cost += (extra_cost->alu.arith
9408 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9409 ? extra_cost->alu.arith
9410 : extra_cost->alu.arith_shift));
9412 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9413 speed_p)
9414 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9415 return true;
9418 if (speed_p)
9419 *cost += 2 * extra_cost->alu.arith;
9420 return false;
9423 /* Vector mode? */
9424 *cost = LIBCALL_COST (2);
9425 return false;
9427 case AND: case XOR: case IOR:
9428 if (mode == SImode)
9430 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9431 rtx op0 = XEXP (x, 0);
9432 rtx shift_op, shift_reg;
9434 *cost = COSTS_N_INSNS (1);
9436 if (subcode == NOT
9437 && (code == AND
9438 || (code == IOR && TARGET_THUMB2)))
9439 op0 = XEXP (op0, 0);
9441 shift_reg = NULL;
9442 shift_op = shifter_op_p (op0, &shift_reg);
9443 if (shift_op != NULL)
9445 if (shift_reg)
9447 if (speed_p)
9448 *cost += extra_cost->alu.log_shift_reg;
9449 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9451 else if (speed_p)
9452 *cost += extra_cost->alu.log_shift;
9454 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9455 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9456 return true;
9459 if (CONST_INT_P (XEXP (x, 1)))
9461 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9462 INTVAL (XEXP (x, 1)), NULL_RTX,
9463 NULL_RTX, 1, 0);
9465 *cost = COSTS_N_INSNS (insns);
9466 if (speed_p)
9467 *cost += insns * extra_cost->alu.logical;
9468 *cost += rtx_cost (op0, code, 0, speed_p);
9469 return true;
9472 if (speed_p)
9473 *cost += extra_cost->alu.logical;
9474 *cost += (rtx_cost (op0, code, 0, speed_p)
9475 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9476 return true;
9479 if (mode == DImode)
9481 rtx op0 = XEXP (x, 0);
9482 enum rtx_code subcode = GET_CODE (op0);
9484 *cost = COSTS_N_INSNS (2);
9486 if (subcode == NOT
9487 && (code == AND
9488 || (code == IOR && TARGET_THUMB2)))
9489 op0 = XEXP (op0, 0);
9491 if (GET_CODE (op0) == ZERO_EXTEND)
9493 if (speed_p)
9494 *cost += 2 * extra_cost->alu.logical;
9496 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9497 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9498 return true;
9500 else if (GET_CODE (op0) == SIGN_EXTEND)
9502 if (speed_p)
9503 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9505 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9506 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9507 return true;
9510 if (speed_p)
9511 *cost += 2 * extra_cost->alu.logical;
9513 return true;
9515 /* Vector mode? */
9517 *cost = LIBCALL_COST (2);
9518 return false;
9520 case MULT:
9521 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9522 && (mode == SFmode || !TARGET_VFP_SINGLE))
9524 rtx op0 = XEXP (x, 0);
9526 *cost = COSTS_N_INSNS (1);
9528 if (GET_CODE (op0) == NEG)
9529 op0 = XEXP (op0, 0);
9531 if (speed_p)
9532 *cost += extra_cost->fp[mode != SFmode].mult;
9534 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9535 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9536 return true;
9538 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9540 *cost = LIBCALL_COST (2);
9541 return false;
9544 if (mode == SImode)
9546 *cost = COSTS_N_INSNS (1);
9547 if (TARGET_DSP_MULTIPLY
9548 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9549 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9550 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9551 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9552 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9553 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9554 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9555 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9556 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9557 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9558 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9559 && (INTVAL (XEXP (XEXP (x, 1), 1))
9560 == 16))))))
9562 /* SMUL[TB][TB]. */
9563 if (speed_p)
9564 *cost += extra_cost->mult[0].extend;
9565 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9566 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9567 return true;
9569 if (speed_p)
9570 *cost += extra_cost->mult[0].simple;
9571 return false;
9574 if (mode == DImode)
9576 if (arm_arch3m
9577 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9578 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9579 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9580 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9582 *cost = COSTS_N_INSNS (1);
9583 if (speed_p)
9584 *cost += extra_cost->mult[1].extend;
9585 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9586 ZERO_EXTEND, 0, speed_p)
9587 + rtx_cost (XEXP (XEXP (x, 1), 0),
9588 ZERO_EXTEND, 0, speed_p));
9589 return true;
9592 *cost = LIBCALL_COST (2);
9593 return false;
9596 /* Vector mode? */
9597 *cost = LIBCALL_COST (2);
9598 return false;
9600 case NEG:
9601 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9602 && (mode == SFmode || !TARGET_VFP_SINGLE))
9604 *cost = COSTS_N_INSNS (1);
9605 if (speed_p)
9606 *cost += extra_cost->fp[mode != SFmode].neg;
9608 return false;
9610 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9612 *cost = LIBCALL_COST (1);
9613 return false;
9616 if (mode == SImode)
9618 if (GET_CODE (XEXP (x, 0)) == ABS)
9620 *cost = COSTS_N_INSNS (2);
9621 /* Assume the non-flag-changing variant. */
9622 if (speed_p)
9623 *cost += (extra_cost->alu.log_shift
9624 + extra_cost->alu.arith_shift);
9625 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9626 return true;
9629 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9630 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9632 *cost = COSTS_N_INSNS (2);
9633 /* No extra cost for MOV imm and MVN imm. */
9634 /* If the comparison op is using the flags, there's no further
9635 cost, otherwise we need to add the cost of the comparison. */
9636 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9637 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9638 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9640 *cost += (COSTS_N_INSNS (1)
9641 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9642 speed_p)
9643 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9644 speed_p));
9645 if (speed_p)
9646 *cost += extra_cost->alu.arith;
9648 return true;
9650 *cost = COSTS_N_INSNS (1);
9651 if (speed_p)
9652 *cost += extra_cost->alu.arith;
9653 return false;
9656 if (GET_MODE_CLASS (mode) == MODE_INT
9657 && GET_MODE_SIZE (mode) < 4)
9659 /* Slightly disparage, as we might need an extend operation. */
9660 *cost = 1 + COSTS_N_INSNS (1);
9661 if (speed_p)
9662 *cost += extra_cost->alu.arith;
9663 return false;
9666 if (mode == DImode)
9668 *cost = COSTS_N_INSNS (2);
9669 if (speed_p)
9670 *cost += 2 * extra_cost->alu.arith;
9671 return false;
9674 /* Vector mode? */
9675 *cost = LIBCALL_COST (1);
9676 return false;
9678 case NOT:
9679 if (mode == SImode)
9681 rtx shift_op;
9682 rtx shift_reg = NULL;
9684 *cost = COSTS_N_INSNS (1);
9685 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9687 if (shift_op)
9689 if (shift_reg != NULL)
9691 if (speed_p)
9692 *cost += extra_cost->alu.log_shift_reg;
9693 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9695 else if (speed_p)
9696 *cost += extra_cost->alu.log_shift;
9697 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9698 return true;
9701 if (speed_p)
9702 *cost += extra_cost->alu.logical;
9703 return false;
9705 if (mode == DImode)
9707 *cost = COSTS_N_INSNS (2);
9708 return false;
9711 /* Vector mode? */
9713 *cost += LIBCALL_COST (1);
9714 return false;
9716 case IF_THEN_ELSE:
9718 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9720 *cost = COSTS_N_INSNS (4);
9721 return true;
9723 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9724 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9726 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9727 /* Assume that if one arm of the if_then_else is a register,
9728 that it will be tied with the result and eliminate the
9729 conditional insn. */
9730 if (REG_P (XEXP (x, 1)))
9731 *cost += op2cost;
9732 else if (REG_P (XEXP (x, 2)))
9733 *cost += op1cost;
9734 else
9736 if (speed_p)
9738 if (extra_cost->alu.non_exec_costs_exec)
9739 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
9740 else
9741 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
9743 else
9744 *cost += op1cost + op2cost;
9747 return true;
9749 case COMPARE:
9750 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
9751 *cost = 0;
9752 else
9754 enum machine_mode op0mode;
9755 /* We'll mostly assume that the cost of a compare is the cost of the
9756 LHS. However, there are some notable exceptions. */
9758 /* Floating point compares are never done as side-effects. */
9759 op0mode = GET_MODE (XEXP (x, 0));
9760 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
9761 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
9763 *cost = COSTS_N_INSNS (1);
9764 if (speed_p)
9765 *cost += extra_cost->fp[op0mode != SFmode].compare;
9767 if (XEXP (x, 1) == CONST0_RTX (op0mode))
9769 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9770 return true;
9773 return false;
9775 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
9777 *cost = LIBCALL_COST (2);
9778 return false;
9781 /* DImode compares normally take two insns. */
9782 if (op0mode == DImode)
9784 *cost = COSTS_N_INSNS (2);
9785 if (speed_p)
9786 *cost += 2 * extra_cost->alu.arith;
9787 return false;
9790 if (op0mode == SImode)
9792 rtx shift_op;
9793 rtx shift_reg;
9795 if (XEXP (x, 1) == const0_rtx
9796 && !(REG_P (XEXP (x, 0))
9797 || (GET_CODE (XEXP (x, 0)) == SUBREG
9798 && REG_P (SUBREG_REG (XEXP (x, 0))))))
9800 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
9802 /* Multiply operations that set the flags are often
9803 significantly more expensive. */
9804 if (speed_p
9805 && GET_CODE (XEXP (x, 0)) == MULT
9806 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
9807 *cost += extra_cost->mult[0].flag_setting;
9809 if (speed_p
9810 && GET_CODE (XEXP (x, 0)) == PLUS
9811 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9812 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
9813 0), 1), mode))
9814 *cost += extra_cost->mult[0].flag_setting;
9815 return true;
9818 shift_reg = NULL;
9819 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9820 if (shift_op != NULL)
9822 *cost = COSTS_N_INSNS (1);
9823 if (shift_reg != NULL)
9825 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9826 if (speed_p)
9827 *cost += extra_cost->alu.arith_shift_reg;
9829 else if (speed_p)
9830 *cost += extra_cost->alu.arith_shift;
9831 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9832 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
9833 return true;
9836 *cost = COSTS_N_INSNS (1);
9837 if (speed_p)
9838 *cost += extra_cost->alu.arith;
9839 if (CONST_INT_P (XEXP (x, 1))
9840 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9842 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
9843 return true;
9845 return false;
9848 /* Vector mode? */
9850 *cost = LIBCALL_COST (2);
9851 return false;
9853 return true;
9855 case EQ:
9856 case NE:
9857 case LT:
9858 case LE:
9859 case GT:
9860 case GE:
9861 case LTU:
9862 case LEU:
9863 case GEU:
9864 case GTU:
9865 case ORDERED:
9866 case UNORDERED:
9867 case UNEQ:
9868 case UNLE:
9869 case UNLT:
9870 case UNGE:
9871 case UNGT:
9872 case LTGT:
9873 if (outer_code == SET)
9875 /* Is it a store-flag operation? */
9876 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9877 && XEXP (x, 1) == const0_rtx)
9879 /* Thumb also needs an IT insn. */
9880 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
9881 return true;
9883 if (XEXP (x, 1) == const0_rtx)
9885 switch (code)
9887 case LT:
9888 /* LSR Rd, Rn, #31. */
9889 *cost = COSTS_N_INSNS (1);
9890 if (speed_p)
9891 *cost += extra_cost->alu.shift;
9892 break;
9894 case EQ:
9895 /* RSBS T1, Rn, #0
9896 ADC Rd, Rn, T1. */
9898 case NE:
9899 /* SUBS T1, Rn, #1
9900 SBC Rd, Rn, T1. */
9901 *cost = COSTS_N_INSNS (2);
9902 break;
9904 case LE:
9905 /* RSBS T1, Rn, Rn, LSR #31
9906 ADC Rd, Rn, T1. */
9907 *cost = COSTS_N_INSNS (2);
9908 if (speed_p)
9909 *cost += extra_cost->alu.arith_shift;
9910 break;
9912 case GT:
9913 /* RSB Rd, Rn, Rn, ASR #1
9914 LSR Rd, Rd, #31. */
9915 *cost = COSTS_N_INSNS (2);
9916 if (speed_p)
9917 *cost += (extra_cost->alu.arith_shift
9918 + extra_cost->alu.shift);
9919 break;
9921 case GE:
9922 /* ASR Rd, Rn, #31
9923 ADD Rd, Rn, #1. */
9924 *cost = COSTS_N_INSNS (2);
9925 if (speed_p)
9926 *cost += extra_cost->alu.shift;
9927 break;
9929 default:
9930 /* Remaining cases are either meaningless or would take
9931 three insns anyway. */
9932 *cost = COSTS_N_INSNS (3);
9933 break;
9935 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9936 return true;
9938 else
9940 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
9941 if (CONST_INT_P (XEXP (x, 1))
9942 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
9944 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9945 return true;
9948 return false;
9951 /* Not directly inside a set. If it involves the condition code
9952 register it must be the condition for a branch, cond_exec or
9953 I_T_E operation. Since the comparison is performed elsewhere
9954 this is just the control part which has no additional
9955 cost. */
9956 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
9957 && XEXP (x, 1) == const0_rtx)
9959 *cost = 0;
9960 return true;
9962 return false;
9964 case ABS:
9965 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9966 && (mode == SFmode || !TARGET_VFP_SINGLE))
9968 *cost = COSTS_N_INSNS (1);
9969 if (speed_p)
9970 *cost += extra_cost->fp[mode != SFmode].neg;
9972 return false;
9974 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9976 *cost = LIBCALL_COST (1);
9977 return false;
9980 if (mode == SImode)
9982 *cost = COSTS_N_INSNS (1);
9983 if (speed_p)
9984 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
9985 return false;
9987 /* Vector mode? */
9988 *cost = LIBCALL_COST (1);
9989 return false;
9991 case SIGN_EXTEND:
9992 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
9993 && MEM_P (XEXP (x, 0)))
9995 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
9997 if (mode == DImode)
9998 *cost += COSTS_N_INSNS (1);
10000 if (!speed_p)
10001 return true;
10003 if (GET_MODE (XEXP (x, 0)) == SImode)
10004 *cost += extra_cost->ldst.load;
10005 else
10006 *cost += extra_cost->ldst.load_sign_extend;
10008 if (mode == DImode)
10009 *cost += extra_cost->alu.shift;
10011 return true;
10014 /* Widening from less than 32-bits requires an extend operation. */
10015 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10017 /* We have SXTB/SXTH. */
10018 *cost = COSTS_N_INSNS (1);
10019 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10020 if (speed_p)
10021 *cost += extra_cost->alu.extnd;
10023 else if (GET_MODE (XEXP (x, 0)) != SImode)
10025 /* Needs two shifts. */
10026 *cost = COSTS_N_INSNS (2);
10027 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10028 if (speed_p)
10029 *cost += 2 * extra_cost->alu.shift;
10032 /* Widening beyond 32-bits requires one more insn. */
10033 if (mode == DImode)
10035 *cost += COSTS_N_INSNS (1);
10036 if (speed_p)
10037 *cost += extra_cost->alu.shift;
10040 return true;
10042 case ZERO_EXTEND:
10043 if ((arm_arch4
10044 || GET_MODE (XEXP (x, 0)) == SImode
10045 || GET_MODE (XEXP (x, 0)) == QImode)
10046 && MEM_P (XEXP (x, 0)))
10048 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10050 if (mode == DImode)
10051 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10053 return true;
10056 /* Widening from less than 32-bits requires an extend operation. */
10057 if (GET_MODE (XEXP (x, 0)) == QImode)
10059 /* UXTB can be a shorter instruction in Thumb2, but it might
10060 be slower than the AND Rd, Rn, #255 alternative. When
10061 optimizing for speed it should never be slower to use
10062 AND, and we don't really model 16-bit vs 32-bit insns
10063 here. */
10064 *cost = COSTS_N_INSNS (1);
10065 if (speed_p)
10066 *cost += extra_cost->alu.logical;
10068 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10070 /* We have UXTB/UXTH. */
10071 *cost = COSTS_N_INSNS (1);
10072 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10073 if (speed_p)
10074 *cost += extra_cost->alu.extnd;
10076 else if (GET_MODE (XEXP (x, 0)) != SImode)
10078 /* Needs two shifts. It's marginally preferable to use
10079 shifts rather than two BIC instructions as the second
10080 shift may merge with a subsequent insn as a shifter
10081 op. */
10082 *cost = COSTS_N_INSNS (2);
10083 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10084 if (speed_p)
10085 *cost += 2 * extra_cost->alu.shift;
10088 /* Widening beyond 32-bits requires one more insn. */
10089 if (mode == DImode)
10091 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10094 return true;
10096 case CONST_INT:
10097 *cost = 0;
10098 /* CONST_INT has no mode, so we cannot tell for sure how many
10099 insns are really going to be needed. The best we can do is
10100 look at the value passed. If it fits in SImode, then assume
10101 that's the mode it will be used for. Otherwise assume it
10102 will be used in DImode. */
10103 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10104 mode = SImode;
10105 else
10106 mode = DImode;
10108 /* Avoid blowing up in arm_gen_constant (). */
10109 if (!(outer_code == PLUS
10110 || outer_code == AND
10111 || outer_code == IOR
10112 || outer_code == XOR
10113 || outer_code == MINUS))
10114 outer_code = SET;
10116 const_int_cost:
10117 if (mode == SImode)
10119 *cost += 0;
10120 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10121 INTVAL (x), NULL, NULL,
10122 0, 0));
10123 /* Extra costs? */
10125 else
10127 *cost += COSTS_N_INSNS (arm_gen_constant
10128 (outer_code, SImode, NULL,
10129 trunc_int_for_mode (INTVAL (x), SImode),
10130 NULL, NULL, 0, 0)
10131 + arm_gen_constant (outer_code, SImode, NULL,
10132 INTVAL (x) >> 32, NULL,
10133 NULL, 0, 0));
10134 /* Extra costs? */
10137 return true;
10139 case CONST:
10140 case LABEL_REF:
10141 case SYMBOL_REF:
10142 if (speed_p)
10144 if (arm_arch_thumb2 && !flag_pic)
10145 *cost = COSTS_N_INSNS (2);
10146 else
10147 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10149 else
10150 *cost = COSTS_N_INSNS (2);
10152 if (flag_pic)
10154 *cost += COSTS_N_INSNS (1);
10155 if (speed_p)
10156 *cost += extra_cost->alu.arith;
10159 return true;
10161 case CONST_FIXED:
10162 *cost = COSTS_N_INSNS (4);
10163 /* Fixme. */
10164 return true;
10166 case CONST_DOUBLE:
10167 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10168 && (mode == SFmode || !TARGET_VFP_SINGLE))
10170 if (vfp3_const_double_rtx (x))
10172 *cost = COSTS_N_INSNS (1);
10173 if (speed_p)
10174 *cost += extra_cost->fp[mode == DFmode].fpconst;
10175 return true;
10178 if (speed_p)
10180 *cost = COSTS_N_INSNS (1);
10181 if (mode == DFmode)
10182 *cost += extra_cost->ldst.loadd;
10183 else
10184 *cost += extra_cost->ldst.loadf;
10186 else
10187 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10189 return true;
10191 *cost = COSTS_N_INSNS (4);
10192 return true;
10194 case CONST_VECTOR:
10195 /* Fixme. */
10196 if (TARGET_NEON
10197 && TARGET_HARD_FLOAT
10198 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10199 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10200 *cost = COSTS_N_INSNS (1);
10201 else
10202 *cost = COSTS_N_INSNS (4);
10203 return true;
10205 case HIGH:
10206 case LO_SUM:
10207 *cost = COSTS_N_INSNS (1);
10208 /* When optimizing for size, we prefer constant pool entries to
10209 MOVW/MOVT pairs, so bump the cost of these slightly. */
10210 if (!speed_p)
10211 *cost += 1;
10212 return true;
10214 case CLZ:
10215 *cost = COSTS_N_INSNS (1);
10216 if (speed_p)
10217 *cost += extra_cost->alu.clz;
10218 return false;
10220 case SMIN:
10221 if (XEXP (x, 1) == const0_rtx)
10223 *cost = COSTS_N_INSNS (1);
10224 if (speed_p)
10225 *cost += extra_cost->alu.log_shift;
10226 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10227 return true;
10229 /* Fall through. */
10230 case SMAX:
10231 case UMIN:
10232 case UMAX:
10233 *cost = COSTS_N_INSNS (2);
10234 return false;
10236 case TRUNCATE:
10237 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10238 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10239 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10240 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10241 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10242 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10243 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10244 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10245 == ZERO_EXTEND))))
10247 *cost = COSTS_N_INSNS (1);
10248 if (speed_p)
10249 *cost += extra_cost->mult[1].extend;
10250 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10251 speed_p)
10252 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10253 0, speed_p));
10254 return true;
10256 *cost = LIBCALL_COST (1);
10257 return false;
10259 case UNSPEC:
10260 return arm_unspec_cost (x, outer_code, speed_p, cost);
10262 case PC:
10263 /* Reading the PC is like reading any other register. Writing it
10264 is more expensive, but we take that into account elsewhere. */
10265 *cost = 0;
10266 return true;
10268 case ZERO_EXTRACT:
10269 /* TODO: Simple zero_extract of bottom bits using AND. */
10270 /* Fall through. */
10271 case SIGN_EXTRACT:
10272 if (arm_arch6
10273 && mode == SImode
10274 && CONST_INT_P (XEXP (x, 1))
10275 && CONST_INT_P (XEXP (x, 2)))
10277 *cost = COSTS_N_INSNS (1);
10278 if (speed_p)
10279 *cost += extra_cost->alu.bfx;
10280 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10281 return true;
10283 /* Without UBFX/SBFX, need to resort to shift operations. */
10284 *cost = COSTS_N_INSNS (2);
10285 if (speed_p)
10286 *cost += 2 * extra_cost->alu.shift;
10287 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10288 return true;
10290 case FLOAT_EXTEND:
10291 if (TARGET_HARD_FLOAT)
10293 *cost = COSTS_N_INSNS (1);
10294 if (speed_p)
10295 *cost += extra_cost->fp[mode == DFmode].widen;
10296 if (!TARGET_FPU_ARMV8
10297 && GET_MODE (XEXP (x, 0)) == HFmode)
10299 /* Pre v8, widening HF->DF is a two-step process, first
10300 widening to SFmode. */
10301 *cost += COSTS_N_INSNS (1);
10302 if (speed_p)
10303 *cost += extra_cost->fp[0].widen;
10305 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10306 return true;
10309 *cost = LIBCALL_COST (1);
10310 return false;
10312 case FLOAT_TRUNCATE:
10313 if (TARGET_HARD_FLOAT)
10315 *cost = COSTS_N_INSNS (1);
10316 if (speed_p)
10317 *cost += extra_cost->fp[mode == DFmode].narrow;
10318 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10319 return true;
10320 /* Vector modes? */
10322 *cost = LIBCALL_COST (1);
10323 return false;
10325 case FIX:
10326 case UNSIGNED_FIX:
10327 if (TARGET_HARD_FLOAT)
10329 if (GET_MODE_CLASS (mode) == MODE_INT)
10331 *cost = COSTS_N_INSNS (1);
10332 if (speed_p)
10333 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10334 /* Strip of the 'cost' of rounding towards zero. */
10335 if (GET_CODE (XEXP (x, 0)) == FIX)
10336 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10337 else
10338 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10339 /* ??? Increase the cost to deal with transferring from
10340 FP -> CORE registers? */
10341 return true;
10343 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10344 && TARGET_FPU_ARMV8)
10346 *cost = COSTS_N_INSNS (1);
10347 if (speed_p)
10348 *cost += extra_cost->fp[mode == DFmode].roundint;
10349 return false;
10351 /* Vector costs? */
10353 *cost = LIBCALL_COST (1);
10354 return false;
10356 case FLOAT:
10357 case UNSIGNED_FLOAT:
10358 if (TARGET_HARD_FLOAT)
10360 /* ??? Increase the cost to deal with transferring from CORE
10361 -> FP registers? */
10362 *cost = COSTS_N_INSNS (1);
10363 if (speed_p)
10364 *cost += extra_cost->fp[mode == DFmode].fromint;
10365 return false;
10367 *cost = LIBCALL_COST (1);
10368 return false;
10370 case CALL:
10371 *cost = COSTS_N_INSNS (1);
10372 return true;
10374 case ASM_OPERANDS:
10375 /* Just a guess. Cost one insn per input. */
10376 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10377 return true;
10379 default:
10380 if (mode != VOIDmode)
10381 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10382 else
10383 *cost = COSTS_N_INSNS (4); /* Who knows? */
10384 return false;
10388 #undef HANDLE_NARROW_SHIFT_ARITH
10390 /* RTX costs when optimizing for size. */
10391 static bool
10392 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10393 int *total, bool speed)
10395 bool result;
10397 if (TARGET_OLD_RTX_COSTS
10398 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10400 /* Old way. (Deprecated.) */
10401 if (!speed)
10402 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10403 (enum rtx_code) outer_code, total);
10404 else
10405 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10406 (enum rtx_code) outer_code, total,
10407 speed);
10409 else
10411 /* New way. */
10412 if (current_tune->insn_extra_cost)
10413 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10414 (enum rtx_code) outer_code,
10415 current_tune->insn_extra_cost,
10416 total, speed);
10417 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10418 && current_tune->insn_extra_cost != NULL */
10419 else
10420 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10421 (enum rtx_code) outer_code,
10422 &generic_extra_costs, total, speed);
10425 if (dump_file && (dump_flags & TDF_DETAILS))
10427 print_rtl_single (dump_file, x);
10428 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10429 *total, result ? "final" : "partial");
10431 return result;
10434 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10435 supported on any "slowmul" cores, so it can be ignored. */
10437 static bool
10438 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10439 int *total, bool speed)
10441 enum machine_mode mode = GET_MODE (x);
10443 if (TARGET_THUMB)
10445 *total = thumb1_rtx_costs (x, code, outer_code);
10446 return true;
10449 switch (code)
10451 case MULT:
10452 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10453 || mode == DImode)
10455 *total = COSTS_N_INSNS (20);
10456 return false;
10459 if (CONST_INT_P (XEXP (x, 1)))
10461 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10462 & (unsigned HOST_WIDE_INT) 0xffffffff);
10463 int cost, const_ok = const_ok_for_arm (i);
10464 int j, booth_unit_size;
10466 /* Tune as appropriate. */
10467 cost = const_ok ? 4 : 8;
10468 booth_unit_size = 2;
10469 for (j = 0; i && j < 32; j += booth_unit_size)
10471 i >>= booth_unit_size;
10472 cost++;
10475 *total = COSTS_N_INSNS (cost);
10476 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10477 return true;
10480 *total = COSTS_N_INSNS (20);
10481 return false;
10483 default:
10484 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10489 /* RTX cost for cores with a fast multiply unit (M variants). */
10491 static bool
10492 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10493 int *total, bool speed)
10495 enum machine_mode mode = GET_MODE (x);
10497 if (TARGET_THUMB1)
10499 *total = thumb1_rtx_costs (x, code, outer_code);
10500 return true;
10503 /* ??? should thumb2 use different costs? */
10504 switch (code)
10506 case MULT:
10507 /* There is no point basing this on the tuning, since it is always the
10508 fast variant if it exists at all. */
10509 if (mode == DImode
10510 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10511 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10512 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10514 *total = COSTS_N_INSNS(2);
10515 return false;
10519 if (mode == DImode)
10521 *total = COSTS_N_INSNS (5);
10522 return false;
10525 if (CONST_INT_P (XEXP (x, 1)))
10527 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10528 & (unsigned HOST_WIDE_INT) 0xffffffff);
10529 int cost, const_ok = const_ok_for_arm (i);
10530 int j, booth_unit_size;
10532 /* Tune as appropriate. */
10533 cost = const_ok ? 4 : 8;
10534 booth_unit_size = 8;
10535 for (j = 0; i && j < 32; j += booth_unit_size)
10537 i >>= booth_unit_size;
10538 cost++;
10541 *total = COSTS_N_INSNS(cost);
10542 return false;
10545 if (mode == SImode)
10547 *total = COSTS_N_INSNS (4);
10548 return false;
10551 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10553 if (TARGET_HARD_FLOAT
10554 && (mode == SFmode
10555 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10557 *total = COSTS_N_INSNS (1);
10558 return false;
10562 /* Requires a lib call */
10563 *total = COSTS_N_INSNS (20);
10564 return false;
10566 default:
10567 return arm_rtx_costs_1 (x, outer_code, total, speed);
10572 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10573 so it can be ignored. */
10575 static bool
10576 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10577 int *total, bool speed)
10579 enum machine_mode mode = GET_MODE (x);
10581 if (TARGET_THUMB)
10583 *total = thumb1_rtx_costs (x, code, outer_code);
10584 return true;
10587 switch (code)
10589 case COMPARE:
10590 if (GET_CODE (XEXP (x, 0)) != MULT)
10591 return arm_rtx_costs_1 (x, outer_code, total, speed);
10593 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10594 will stall until the multiplication is complete. */
10595 *total = COSTS_N_INSNS (3);
10596 return false;
10598 case MULT:
10599 /* There is no point basing this on the tuning, since it is always the
10600 fast variant if it exists at all. */
10601 if (mode == DImode
10602 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10603 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10604 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10606 *total = COSTS_N_INSNS (2);
10607 return false;
10611 if (mode == DImode)
10613 *total = COSTS_N_INSNS (5);
10614 return false;
10617 if (CONST_INT_P (XEXP (x, 1)))
10619 /* If operand 1 is a constant we can more accurately
10620 calculate the cost of the multiply. The multiplier can
10621 retire 15 bits on the first cycle and a further 12 on the
10622 second. We do, of course, have to load the constant into
10623 a register first. */
10624 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10625 /* There's a general overhead of one cycle. */
10626 int cost = 1;
10627 unsigned HOST_WIDE_INT masked_const;
10629 if (i & 0x80000000)
10630 i = ~i;
10632 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10634 masked_const = i & 0xffff8000;
10635 if (masked_const != 0)
10637 cost++;
10638 masked_const = i & 0xf8000000;
10639 if (masked_const != 0)
10640 cost++;
10642 *total = COSTS_N_INSNS (cost);
10643 return false;
10646 if (mode == SImode)
10648 *total = COSTS_N_INSNS (3);
10649 return false;
10652 /* Requires a lib call */
10653 *total = COSTS_N_INSNS (20);
10654 return false;
10656 default:
10657 return arm_rtx_costs_1 (x, outer_code, total, speed);
10662 /* RTX costs for 9e (and later) cores. */
10664 static bool
10665 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10666 int *total, bool speed)
10668 enum machine_mode mode = GET_MODE (x);
10670 if (TARGET_THUMB1)
10672 switch (code)
10674 case MULT:
10675 *total = COSTS_N_INSNS (3);
10676 return true;
10678 default:
10679 *total = thumb1_rtx_costs (x, code, outer_code);
10680 return true;
10684 switch (code)
10686 case MULT:
10687 /* There is no point basing this on the tuning, since it is always the
10688 fast variant if it exists at all. */
10689 if (mode == DImode
10690 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10691 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10692 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10694 *total = COSTS_N_INSNS (2);
10695 return false;
10699 if (mode == DImode)
10701 *total = COSTS_N_INSNS (5);
10702 return false;
10705 if (mode == SImode)
10707 *total = COSTS_N_INSNS (2);
10708 return false;
10711 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10713 if (TARGET_HARD_FLOAT
10714 && (mode == SFmode
10715 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10717 *total = COSTS_N_INSNS (1);
10718 return false;
10722 *total = COSTS_N_INSNS (20);
10723 return false;
10725 default:
10726 return arm_rtx_costs_1 (x, outer_code, total, speed);
10729 /* All address computations that can be done are free, but rtx cost returns
10730 the same for practically all of them. So we weight the different types
10731 of address here in the order (most pref first):
10732 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10733 static inline int
10734 arm_arm_address_cost (rtx x)
10736 enum rtx_code c = GET_CODE (x);
10738 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10739 return 0;
10740 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10741 return 10;
10743 if (c == PLUS)
10745 if (CONST_INT_P (XEXP (x, 1)))
10746 return 2;
10748 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10749 return 3;
10751 return 4;
10754 return 6;
10757 static inline int
10758 arm_thumb_address_cost (rtx x)
10760 enum rtx_code c = GET_CODE (x);
10762 if (c == REG)
10763 return 1;
10764 if (c == PLUS
10765 && REG_P (XEXP (x, 0))
10766 && CONST_INT_P (XEXP (x, 1)))
10767 return 1;
10769 return 2;
10772 static int
10773 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
10774 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10776 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10779 /* Adjust cost hook for XScale. */
10780 static bool
10781 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10783 /* Some true dependencies can have a higher cost depending
10784 on precisely how certain input operands are used. */
10785 if (REG_NOTE_KIND(link) == 0
10786 && recog_memoized (insn) >= 0
10787 && recog_memoized (dep) >= 0)
10789 int shift_opnum = get_attr_shift (insn);
10790 enum attr_type attr_type = get_attr_type (dep);
10792 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10793 operand for INSN. If we have a shifted input operand and the
10794 instruction we depend on is another ALU instruction, then we may
10795 have to account for an additional stall. */
10796 if (shift_opnum != 0
10797 && (attr_type == TYPE_ALU_SHIFT_IMM
10798 || attr_type == TYPE_ALUS_SHIFT_IMM
10799 || attr_type == TYPE_LOGIC_SHIFT_IMM
10800 || attr_type == TYPE_LOGICS_SHIFT_IMM
10801 || attr_type == TYPE_ALU_SHIFT_REG
10802 || attr_type == TYPE_ALUS_SHIFT_REG
10803 || attr_type == TYPE_LOGIC_SHIFT_REG
10804 || attr_type == TYPE_LOGICS_SHIFT_REG
10805 || attr_type == TYPE_MOV_SHIFT
10806 || attr_type == TYPE_MVN_SHIFT
10807 || attr_type == TYPE_MOV_SHIFT_REG
10808 || attr_type == TYPE_MVN_SHIFT_REG))
10810 rtx shifted_operand;
10811 int opno;
10813 /* Get the shifted operand. */
10814 extract_insn (insn);
10815 shifted_operand = recog_data.operand[shift_opnum];
10817 /* Iterate over all the operands in DEP. If we write an operand
10818 that overlaps with SHIFTED_OPERAND, then we have increase the
10819 cost of this dependency. */
10820 extract_insn (dep);
10821 preprocess_constraints ();
10822 for (opno = 0; opno < recog_data.n_operands; opno++)
10824 /* We can ignore strict inputs. */
10825 if (recog_data.operand_type[opno] == OP_IN)
10826 continue;
10828 if (reg_overlap_mentioned_p (recog_data.operand[opno],
10829 shifted_operand))
10831 *cost = 2;
10832 return false;
10837 return true;
10840 /* Adjust cost hook for Cortex A9. */
10841 static bool
10842 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10844 switch (REG_NOTE_KIND (link))
10846 case REG_DEP_ANTI:
10847 *cost = 0;
10848 return false;
10850 case REG_DEP_TRUE:
10851 case REG_DEP_OUTPUT:
10852 if (recog_memoized (insn) >= 0
10853 && recog_memoized (dep) >= 0)
10855 if (GET_CODE (PATTERN (insn)) == SET)
10857 if (GET_MODE_CLASS
10858 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10859 || GET_MODE_CLASS
10860 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10862 enum attr_type attr_type_insn = get_attr_type (insn);
10863 enum attr_type attr_type_dep = get_attr_type (dep);
10865 /* By default all dependencies of the form
10866 s0 = s0 <op> s1
10867 s0 = s0 <op> s2
10868 have an extra latency of 1 cycle because
10869 of the input and output dependency in this
10870 case. However this gets modeled as an true
10871 dependency and hence all these checks. */
10872 if (REG_P (SET_DEST (PATTERN (insn)))
10873 && REG_P (SET_DEST (PATTERN (dep)))
10874 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
10875 SET_DEST (PATTERN (dep))))
10877 /* FMACS is a special case where the dependent
10878 instruction can be issued 3 cycles before
10879 the normal latency in case of an output
10880 dependency. */
10881 if ((attr_type_insn == TYPE_FMACS
10882 || attr_type_insn == TYPE_FMACD)
10883 && (attr_type_dep == TYPE_FMACS
10884 || attr_type_dep == TYPE_FMACD))
10886 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
10887 *cost = insn_default_latency (dep) - 3;
10888 else
10889 *cost = insn_default_latency (dep);
10890 return false;
10892 else
10894 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
10895 *cost = insn_default_latency (dep) + 1;
10896 else
10897 *cost = insn_default_latency (dep);
10899 return false;
10904 break;
10906 default:
10907 gcc_unreachable ();
10910 return true;
10913 /* Adjust cost hook for FA726TE. */
10914 static bool
10915 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
10917 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10918 have penalty of 3. */
10919 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
10920 && recog_memoized (insn) >= 0
10921 && recog_memoized (dep) >= 0
10922 && get_attr_conds (dep) == CONDS_SET)
10924 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10925 if (get_attr_conds (insn) == CONDS_USE
10926 && get_attr_type (insn) != TYPE_BRANCH)
10928 *cost = 3;
10929 return false;
10932 if (GET_CODE (PATTERN (insn)) == COND_EXEC
10933 || get_attr_conds (insn) == CONDS_USE)
10935 *cost = 0;
10936 return false;
10940 return true;
10943 /* Implement TARGET_REGISTER_MOVE_COST.
10945 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10946 it is typically more expensive than a single memory access. We set
10947 the cost to less than two memory accesses so that floating
10948 point to integer conversion does not go through memory. */
10951 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10952 reg_class_t from, reg_class_t to)
10954 if (TARGET_32BIT)
10956 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
10957 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
10958 return 15;
10959 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
10960 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
10961 return 4;
10962 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
10963 return 20;
10964 else
10965 return 2;
10967 else
10969 if (from == HI_REGS || to == HI_REGS)
10970 return 4;
10971 else
10972 return 2;
10976 /* Implement TARGET_MEMORY_MOVE_COST. */
10979 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
10980 bool in ATTRIBUTE_UNUSED)
10982 if (TARGET_32BIT)
10983 return 10;
10984 else
10986 if (GET_MODE_SIZE (mode) < 4)
10987 return 8;
10988 else
10989 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
10993 /* Vectorizer cost model implementation. */
10995 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10996 static int
10997 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10998 tree vectype,
10999 int misalign ATTRIBUTE_UNUSED)
11001 unsigned elements;
11003 switch (type_of_cost)
11005 case scalar_stmt:
11006 return current_tune->vec_costs->scalar_stmt_cost;
11008 case scalar_load:
11009 return current_tune->vec_costs->scalar_load_cost;
11011 case scalar_store:
11012 return current_tune->vec_costs->scalar_store_cost;
11014 case vector_stmt:
11015 return current_tune->vec_costs->vec_stmt_cost;
11017 case vector_load:
11018 return current_tune->vec_costs->vec_align_load_cost;
11020 case vector_store:
11021 return current_tune->vec_costs->vec_store_cost;
11023 case vec_to_scalar:
11024 return current_tune->vec_costs->vec_to_scalar_cost;
11026 case scalar_to_vec:
11027 return current_tune->vec_costs->scalar_to_vec_cost;
11029 case unaligned_load:
11030 return current_tune->vec_costs->vec_unalign_load_cost;
11032 case unaligned_store:
11033 return current_tune->vec_costs->vec_unalign_store_cost;
11035 case cond_branch_taken:
11036 return current_tune->vec_costs->cond_taken_branch_cost;
11038 case cond_branch_not_taken:
11039 return current_tune->vec_costs->cond_not_taken_branch_cost;
11041 case vec_perm:
11042 case vec_promote_demote:
11043 return current_tune->vec_costs->vec_stmt_cost;
11045 case vec_construct:
11046 elements = TYPE_VECTOR_SUBPARTS (vectype);
11047 return elements / 2 + 1;
11049 default:
11050 gcc_unreachable ();
11054 /* Implement targetm.vectorize.add_stmt_cost. */
11056 static unsigned
11057 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11058 struct _stmt_vec_info *stmt_info, int misalign,
11059 enum vect_cost_model_location where)
11061 unsigned *cost = (unsigned *) data;
11062 unsigned retval = 0;
11064 if (flag_vect_cost_model)
11066 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11067 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11069 /* Statements in an inner loop relative to the loop being
11070 vectorized are weighted more heavily. The value here is
11071 arbitrary and could potentially be improved with analysis. */
11072 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11073 count *= 50; /* FIXME. */
11075 retval = (unsigned) (count * stmt_cost);
11076 cost[where] += retval;
11079 return retval;
11082 /* Return true if and only if this insn can dual-issue only as older. */
11083 static bool
11084 cortexa7_older_only (rtx insn)
11086 if (recog_memoized (insn) < 0)
11087 return false;
11089 switch (get_attr_type (insn))
11091 case TYPE_ALU_REG:
11092 case TYPE_ALUS_REG:
11093 case TYPE_LOGIC_REG:
11094 case TYPE_LOGICS_REG:
11095 case TYPE_ADC_REG:
11096 case TYPE_ADCS_REG:
11097 case TYPE_ADR:
11098 case TYPE_BFM:
11099 case TYPE_REV:
11100 case TYPE_MVN_REG:
11101 case TYPE_SHIFT_IMM:
11102 case TYPE_SHIFT_REG:
11103 case TYPE_LOAD_BYTE:
11104 case TYPE_LOAD1:
11105 case TYPE_STORE1:
11106 case TYPE_FFARITHS:
11107 case TYPE_FADDS:
11108 case TYPE_FFARITHD:
11109 case TYPE_FADDD:
11110 case TYPE_FMOV:
11111 case TYPE_F_CVT:
11112 case TYPE_FCMPS:
11113 case TYPE_FCMPD:
11114 case TYPE_FCONSTS:
11115 case TYPE_FCONSTD:
11116 case TYPE_FMULS:
11117 case TYPE_FMACS:
11118 case TYPE_FMULD:
11119 case TYPE_FMACD:
11120 case TYPE_FDIVS:
11121 case TYPE_FDIVD:
11122 case TYPE_F_MRC:
11123 case TYPE_F_MRRC:
11124 case TYPE_F_FLAG:
11125 case TYPE_F_LOADS:
11126 case TYPE_F_STORES:
11127 return true;
11128 default:
11129 return false;
11133 /* Return true if and only if this insn can dual-issue as younger. */
11134 static bool
11135 cortexa7_younger (FILE *file, int verbose, rtx insn)
11137 if (recog_memoized (insn) < 0)
11139 if (verbose > 5)
11140 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11141 return false;
11144 switch (get_attr_type (insn))
11146 case TYPE_ALU_IMM:
11147 case TYPE_ALUS_IMM:
11148 case TYPE_LOGIC_IMM:
11149 case TYPE_LOGICS_IMM:
11150 case TYPE_EXTEND:
11151 case TYPE_MVN_IMM:
11152 case TYPE_MOV_IMM:
11153 case TYPE_MOV_REG:
11154 case TYPE_MOV_SHIFT:
11155 case TYPE_MOV_SHIFT_REG:
11156 case TYPE_BRANCH:
11157 case TYPE_CALL:
11158 return true;
11159 default:
11160 return false;
11165 /* Look for an instruction that can dual issue only as an older
11166 instruction, and move it in front of any instructions that can
11167 dual-issue as younger, while preserving the relative order of all
11168 other instructions in the ready list. This is a hueuristic to help
11169 dual-issue in later cycles, by postponing issue of more flexible
11170 instructions. This heuristic may affect dual issue opportunities
11171 in the current cycle. */
11172 static void
11173 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11174 int clock)
11176 int i;
11177 int first_older_only = -1, first_younger = -1;
11179 if (verbose > 5)
11180 fprintf (file,
11181 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11182 clock,
11183 *n_readyp);
11185 /* Traverse the ready list from the head (the instruction to issue
11186 first), and looking for the first instruction that can issue as
11187 younger and the first instruction that can dual-issue only as
11188 older. */
11189 for (i = *n_readyp - 1; i >= 0; i--)
11191 rtx insn = ready[i];
11192 if (cortexa7_older_only (insn))
11194 first_older_only = i;
11195 if (verbose > 5)
11196 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11197 break;
11199 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11200 first_younger = i;
11203 /* Nothing to reorder because either no younger insn found or insn
11204 that can dual-issue only as older appears before any insn that
11205 can dual-issue as younger. */
11206 if (first_younger == -1)
11208 if (verbose > 5)
11209 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11210 return;
11213 /* Nothing to reorder because no older-only insn in the ready list. */
11214 if (first_older_only == -1)
11216 if (verbose > 5)
11217 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11218 return;
11221 /* Move first_older_only insn before first_younger. */
11222 if (verbose > 5)
11223 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11224 INSN_UID(ready [first_older_only]),
11225 INSN_UID(ready [first_younger]));
11226 rtx first_older_only_insn = ready [first_older_only];
11227 for (i = first_older_only; i < first_younger; i++)
11229 ready[i] = ready[i+1];
11232 ready[i] = first_older_only_insn;
11233 return;
11236 /* Implement TARGET_SCHED_REORDER. */
11237 static int
11238 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11239 int clock)
11241 switch (arm_tune)
11243 case cortexa7:
11244 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11245 break;
11246 default:
11247 /* Do nothing for other cores. */
11248 break;
11251 return arm_issue_rate ();
11254 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11255 It corrects the value of COST based on the relationship between
11256 INSN and DEP through the dependence LINK. It returns the new
11257 value. There is a per-core adjust_cost hook to adjust scheduler costs
11258 and the per-core hook can choose to completely override the generic
11259 adjust_cost function. Only put bits of code into arm_adjust_cost that
11260 are common across all cores. */
11261 static int
11262 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11264 rtx i_pat, d_pat;
11266 /* When generating Thumb-1 code, we want to place flag-setting operations
11267 close to a conditional branch which depends on them, so that we can
11268 omit the comparison. */
11269 if (TARGET_THUMB1
11270 && REG_NOTE_KIND (link) == 0
11271 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11272 && recog_memoized (dep) >= 0
11273 && get_attr_conds (dep) == CONDS_SET)
11274 return 0;
11276 if (current_tune->sched_adjust_cost != NULL)
11278 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11279 return cost;
11282 /* XXX Is this strictly true? */
11283 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11284 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11285 return 0;
11287 /* Call insns don't incur a stall, even if they follow a load. */
11288 if (REG_NOTE_KIND (link) == 0
11289 && CALL_P (insn))
11290 return 1;
11292 if ((i_pat = single_set (insn)) != NULL
11293 && MEM_P (SET_SRC (i_pat))
11294 && (d_pat = single_set (dep)) != NULL
11295 && MEM_P (SET_DEST (d_pat)))
11297 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11298 /* This is a load after a store, there is no conflict if the load reads
11299 from a cached area. Assume that loads from the stack, and from the
11300 constant pool are cached, and that others will miss. This is a
11301 hack. */
11303 if ((GET_CODE (src_mem) == SYMBOL_REF
11304 && CONSTANT_POOL_ADDRESS_P (src_mem))
11305 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11306 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11307 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11308 return 1;
11311 return cost;
11315 arm_max_conditional_execute (void)
11317 return max_insns_skipped;
11320 static int
11321 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11323 if (TARGET_32BIT)
11324 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11325 else
11326 return (optimize > 0) ? 2 : 0;
11329 static int
11330 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11332 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11335 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11336 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11337 sequences of non-executed instructions in IT blocks probably take the same
11338 amount of time as executed instructions (and the IT instruction itself takes
11339 space in icache). This function was experimentally determined to give good
11340 results on a popular embedded benchmark. */
11342 static int
11343 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11345 return (TARGET_32BIT && speed_p) ? 1
11346 : arm_default_branch_cost (speed_p, predictable_p);
11349 static bool fp_consts_inited = false;
11351 static REAL_VALUE_TYPE value_fp0;
11353 static void
11354 init_fp_table (void)
11356 REAL_VALUE_TYPE r;
11358 r = REAL_VALUE_ATOF ("0", DFmode);
11359 value_fp0 = r;
11360 fp_consts_inited = true;
11363 /* Return TRUE if rtx X is a valid immediate FP constant. */
11365 arm_const_double_rtx (rtx x)
11367 REAL_VALUE_TYPE r;
11369 if (!fp_consts_inited)
11370 init_fp_table ();
11372 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11373 if (REAL_VALUE_MINUS_ZERO (r))
11374 return 0;
11376 if (REAL_VALUES_EQUAL (r, value_fp0))
11377 return 1;
11379 return 0;
11382 /* VFPv3 has a fairly wide range of representable immediates, formed from
11383 "quarter-precision" floating-point values. These can be evaluated using this
11384 formula (with ^ for exponentiation):
11386 -1^s * n * 2^-r
11388 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11389 16 <= n <= 31 and 0 <= r <= 7.
11391 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11393 - A (most-significant) is the sign bit.
11394 - BCD are the exponent (encoded as r XOR 3).
11395 - EFGH are the mantissa (encoded as n - 16).
11398 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11399 fconst[sd] instruction, or -1 if X isn't suitable. */
11400 static int
11401 vfp3_const_double_index (rtx x)
11403 REAL_VALUE_TYPE r, m;
11404 int sign, exponent;
11405 unsigned HOST_WIDE_INT mantissa, mant_hi;
11406 unsigned HOST_WIDE_INT mask;
11407 HOST_WIDE_INT m1, m2;
11408 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11410 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11411 return -1;
11413 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11415 /* We can't represent these things, so detect them first. */
11416 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11417 return -1;
11419 /* Extract sign, exponent and mantissa. */
11420 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11421 r = real_value_abs (&r);
11422 exponent = REAL_EXP (&r);
11423 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11424 highest (sign) bit, with a fixed binary point at bit point_pos.
11425 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11426 bits for the mantissa, this may fail (low bits would be lost). */
11427 real_ldexp (&m, &r, point_pos - exponent);
11428 REAL_VALUE_TO_INT (&m1, &m2, m);
11429 mantissa = m1;
11430 mant_hi = m2;
11432 /* If there are bits set in the low part of the mantissa, we can't
11433 represent this value. */
11434 if (mantissa != 0)
11435 return -1;
11437 /* Now make it so that mantissa contains the most-significant bits, and move
11438 the point_pos to indicate that the least-significant bits have been
11439 discarded. */
11440 point_pos -= HOST_BITS_PER_WIDE_INT;
11441 mantissa = mant_hi;
11443 /* We can permit four significant bits of mantissa only, plus a high bit
11444 which is always 1. */
11445 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11446 if ((mantissa & mask) != 0)
11447 return -1;
11449 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11450 mantissa >>= point_pos - 5;
11452 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11453 floating-point immediate zero with Neon using an integer-zero load, but
11454 that case is handled elsewhere.) */
11455 if (mantissa == 0)
11456 return -1;
11458 gcc_assert (mantissa >= 16 && mantissa <= 31);
11460 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11461 normalized significands are in the range [1, 2). (Our mantissa is shifted
11462 left 4 places at this point relative to normalized IEEE754 values). GCC
11463 internally uses [0.5, 1) (see real.c), so the exponent returned from
11464 REAL_EXP must be altered. */
11465 exponent = 5 - exponent;
11467 if (exponent < 0 || exponent > 7)
11468 return -1;
11470 /* Sign, mantissa and exponent are now in the correct form to plug into the
11471 formula described in the comment above. */
11472 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11475 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11477 vfp3_const_double_rtx (rtx x)
11479 if (!TARGET_VFP3)
11480 return 0;
11482 return vfp3_const_double_index (x) != -1;
11485 /* Recognize immediates which can be used in various Neon instructions. Legal
11486 immediates are described by the following table (for VMVN variants, the
11487 bitwise inverse of the constant shown is recognized. In either case, VMOV
11488 is output and the correct instruction to use for a given constant is chosen
11489 by the assembler). The constant shown is replicated across all elements of
11490 the destination vector.
11492 insn elems variant constant (binary)
11493 ---- ----- ------- -----------------
11494 vmov i32 0 00000000 00000000 00000000 abcdefgh
11495 vmov i32 1 00000000 00000000 abcdefgh 00000000
11496 vmov i32 2 00000000 abcdefgh 00000000 00000000
11497 vmov i32 3 abcdefgh 00000000 00000000 00000000
11498 vmov i16 4 00000000 abcdefgh
11499 vmov i16 5 abcdefgh 00000000
11500 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11501 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11502 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11503 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11504 vmvn i16 10 00000000 abcdefgh
11505 vmvn i16 11 abcdefgh 00000000
11506 vmov i32 12 00000000 00000000 abcdefgh 11111111
11507 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11508 vmov i32 14 00000000 abcdefgh 11111111 11111111
11509 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11510 vmov i8 16 abcdefgh
11511 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11512 eeeeeeee ffffffff gggggggg hhhhhhhh
11513 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11514 vmov f32 19 00000000 00000000 00000000 00000000
11516 For case 18, B = !b. Representable values are exactly those accepted by
11517 vfp3_const_double_index, but are output as floating-point numbers rather
11518 than indices.
11520 For case 19, we will change it to vmov.i32 when assembling.
11522 Variants 0-5 (inclusive) may also be used as immediates for the second
11523 operand of VORR/VBIC instructions.
11525 The INVERSE argument causes the bitwise inverse of the given operand to be
11526 recognized instead (used for recognizing legal immediates for the VAND/VORN
11527 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11528 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11529 output, rather than the real insns vbic/vorr).
11531 INVERSE makes no difference to the recognition of float vectors.
11533 The return value is the variant of immediate as shown in the above table, or
11534 -1 if the given value doesn't match any of the listed patterns.
11536 static int
11537 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11538 rtx *modconst, int *elementwidth)
11540 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11541 matches = 1; \
11542 for (i = 0; i < idx; i += (STRIDE)) \
11543 if (!(TEST)) \
11544 matches = 0; \
11545 if (matches) \
11547 immtype = (CLASS); \
11548 elsize = (ELSIZE); \
11549 break; \
11552 unsigned int i, elsize = 0, idx = 0, n_elts;
11553 unsigned int innersize;
11554 unsigned char bytes[16];
11555 int immtype = -1, matches;
11556 unsigned int invmask = inverse ? 0xff : 0;
11557 bool vector = GET_CODE (op) == CONST_VECTOR;
11559 if (vector)
11561 n_elts = CONST_VECTOR_NUNITS (op);
11562 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11564 else
11566 n_elts = 1;
11567 if (mode == VOIDmode)
11568 mode = DImode;
11569 innersize = GET_MODE_SIZE (mode);
11572 /* Vectors of float constants. */
11573 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11575 rtx el0 = CONST_VECTOR_ELT (op, 0);
11576 REAL_VALUE_TYPE r0;
11578 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11579 return -1;
11581 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11583 for (i = 1; i < n_elts; i++)
11585 rtx elt = CONST_VECTOR_ELT (op, i);
11586 REAL_VALUE_TYPE re;
11588 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11590 if (!REAL_VALUES_EQUAL (r0, re))
11591 return -1;
11594 if (modconst)
11595 *modconst = CONST_VECTOR_ELT (op, 0);
11597 if (elementwidth)
11598 *elementwidth = 0;
11600 if (el0 == CONST0_RTX (GET_MODE (el0)))
11601 return 19;
11602 else
11603 return 18;
11606 /* Splat vector constant out into a byte vector. */
11607 for (i = 0; i < n_elts; i++)
11609 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11610 unsigned HOST_WIDE_INT elpart;
11611 unsigned int part, parts;
11613 if (CONST_INT_P (el))
11615 elpart = INTVAL (el);
11616 parts = 1;
11618 else if (CONST_DOUBLE_P (el))
11620 elpart = CONST_DOUBLE_LOW (el);
11621 parts = 2;
11623 else
11624 gcc_unreachable ();
11626 for (part = 0; part < parts; part++)
11628 unsigned int byte;
11629 for (byte = 0; byte < innersize; byte++)
11631 bytes[idx++] = (elpart & 0xff) ^ invmask;
11632 elpart >>= BITS_PER_UNIT;
11634 if (CONST_DOUBLE_P (el))
11635 elpart = CONST_DOUBLE_HIGH (el);
11639 /* Sanity check. */
11640 gcc_assert (idx == GET_MODE_SIZE (mode));
11644 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11645 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11647 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11648 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11650 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11651 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11653 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11654 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11656 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11658 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11660 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11661 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11663 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11664 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11666 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11667 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11669 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11670 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11672 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11674 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11676 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11677 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11679 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11680 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11682 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11683 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11685 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11686 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11688 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11690 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11691 && bytes[i] == bytes[(i + 8) % idx]);
11693 while (0);
11695 if (immtype == -1)
11696 return -1;
11698 if (elementwidth)
11699 *elementwidth = elsize;
11701 if (modconst)
11703 unsigned HOST_WIDE_INT imm = 0;
11705 /* Un-invert bytes of recognized vector, if necessary. */
11706 if (invmask != 0)
11707 for (i = 0; i < idx; i++)
11708 bytes[i] ^= invmask;
11710 if (immtype == 17)
11712 /* FIXME: Broken on 32-bit H_W_I hosts. */
11713 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11715 for (i = 0; i < 8; i++)
11716 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11717 << (i * BITS_PER_UNIT);
11719 *modconst = GEN_INT (imm);
11721 else
11723 unsigned HOST_WIDE_INT imm = 0;
11725 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11726 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11728 *modconst = GEN_INT (imm);
11732 return immtype;
11733 #undef CHECK
11736 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11737 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11738 float elements), and a modified constant (whatever should be output for a
11739 VMOV) in *MODCONST. */
11742 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
11743 rtx *modconst, int *elementwidth)
11745 rtx tmpconst;
11746 int tmpwidth;
11747 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11749 if (retval == -1)
11750 return 0;
11752 if (modconst)
11753 *modconst = tmpconst;
11755 if (elementwidth)
11756 *elementwidth = tmpwidth;
11758 return 1;
11761 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11762 the immediate is valid, write a constant suitable for using as an operand
11763 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11764 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11767 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
11768 rtx *modconst, int *elementwidth)
11770 rtx tmpconst;
11771 int tmpwidth;
11772 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11774 if (retval < 0 || retval > 5)
11775 return 0;
11777 if (modconst)
11778 *modconst = tmpconst;
11780 if (elementwidth)
11781 *elementwidth = tmpwidth;
11783 return 1;
11786 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11787 the immediate is valid, write a constant suitable for using as an operand
11788 to VSHR/VSHL to *MODCONST and the corresponding element width to
11789 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11790 because they have different limitations. */
11793 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
11794 rtx *modconst, int *elementwidth,
11795 bool isleftshift)
11797 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11798 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11799 unsigned HOST_WIDE_INT last_elt = 0;
11800 unsigned HOST_WIDE_INT maxshift;
11802 /* Split vector constant out into a byte vector. */
11803 for (i = 0; i < n_elts; i++)
11805 rtx el = CONST_VECTOR_ELT (op, i);
11806 unsigned HOST_WIDE_INT elpart;
11808 if (CONST_INT_P (el))
11809 elpart = INTVAL (el);
11810 else if (CONST_DOUBLE_P (el))
11811 return 0;
11812 else
11813 gcc_unreachable ();
11815 if (i != 0 && elpart != last_elt)
11816 return 0;
11818 last_elt = elpart;
11821 /* Shift less than element size. */
11822 maxshift = innersize * 8;
11824 if (isleftshift)
11826 /* Left shift immediate value can be from 0 to <size>-1. */
11827 if (last_elt >= maxshift)
11828 return 0;
11830 else
11832 /* Right shift immediate value can be from 1 to <size>. */
11833 if (last_elt == 0 || last_elt > maxshift)
11834 return 0;
11837 if (elementwidth)
11838 *elementwidth = innersize * 8;
11840 if (modconst)
11841 *modconst = CONST_VECTOR_ELT (op, 0);
11843 return 1;
11846 /* Return a string suitable for output of Neon immediate logic operation
11847 MNEM. */
11849 char *
11850 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
11851 int inverse, int quad)
11853 int width, is_valid;
11854 static char templ[40];
11856 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11858 gcc_assert (is_valid != 0);
11860 if (quad)
11861 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11862 else
11863 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11865 return templ;
11868 /* Return a string suitable for output of Neon immediate shift operation
11869 (VSHR or VSHL) MNEM. */
11871 char *
11872 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11873 enum machine_mode mode, int quad,
11874 bool isleftshift)
11876 int width, is_valid;
11877 static char templ[40];
11879 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11880 gcc_assert (is_valid != 0);
11882 if (quad)
11883 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11884 else
11885 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11887 return templ;
11890 /* Output a sequence of pairwise operations to implement a reduction.
11891 NOTE: We do "too much work" here, because pairwise operations work on two
11892 registers-worth of operands in one go. Unfortunately we can't exploit those
11893 extra calculations to do the full operation in fewer steps, I don't think.
11894 Although all vector elements of the result but the first are ignored, we
11895 actually calculate the same result in each of the elements. An alternative
11896 such as initially loading a vector with zero to use as each of the second
11897 operands would use up an additional register and take an extra instruction,
11898 for no particular gain. */
11900 void
11901 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
11902 rtx (*reduc) (rtx, rtx, rtx))
11904 enum machine_mode inner = GET_MODE_INNER (mode);
11905 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
11906 rtx tmpsum = op1;
11908 for (i = parts / 2; i >= 1; i /= 2)
11910 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11911 emit_insn (reduc (dest, tmpsum, tmpsum));
11912 tmpsum = dest;
11916 /* If VALS is a vector constant that can be loaded into a register
11917 using VDUP, generate instructions to do so and return an RTX to
11918 assign to the register. Otherwise return NULL_RTX. */
11920 static rtx
11921 neon_vdup_constant (rtx vals)
11923 enum machine_mode mode = GET_MODE (vals);
11924 enum machine_mode inner_mode = GET_MODE_INNER (mode);
11925 int n_elts = GET_MODE_NUNITS (mode);
11926 bool all_same = true;
11927 rtx x;
11928 int i;
11930 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
11931 return NULL_RTX;
11933 for (i = 0; i < n_elts; ++i)
11935 x = XVECEXP (vals, 0, i);
11936 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11937 all_same = false;
11940 if (!all_same)
11941 /* The elements are not all the same. We could handle repeating
11942 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11943 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11944 vdup.i16). */
11945 return NULL_RTX;
11947 /* We can load this constant by using VDUP and a constant in a
11948 single ARM register. This will be cheaper than a vector
11949 load. */
11951 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
11952 return gen_rtx_VEC_DUPLICATE (mode, x);
11955 /* Generate code to load VALS, which is a PARALLEL containing only
11956 constants (for vec_init) or CONST_VECTOR, efficiently into a
11957 register. Returns an RTX to copy into the register, or NULL_RTX
11958 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11961 neon_make_constant (rtx vals)
11963 enum machine_mode mode = GET_MODE (vals);
11964 rtx target;
11965 rtx const_vec = NULL_RTX;
11966 int n_elts = GET_MODE_NUNITS (mode);
11967 int n_const = 0;
11968 int i;
11970 if (GET_CODE (vals) == CONST_VECTOR)
11971 const_vec = vals;
11972 else if (GET_CODE (vals) == PARALLEL)
11974 /* A CONST_VECTOR must contain only CONST_INTs and
11975 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11976 Only store valid constants in a CONST_VECTOR. */
11977 for (i = 0; i < n_elts; ++i)
11979 rtx x = XVECEXP (vals, 0, i);
11980 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11981 n_const++;
11983 if (n_const == n_elts)
11984 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11986 else
11987 gcc_unreachable ();
11989 if (const_vec != NULL
11990 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
11991 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11992 return const_vec;
11993 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
11994 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11995 pipeline cycle; creating the constant takes one or two ARM
11996 pipeline cycles. */
11997 return target;
11998 else if (const_vec != NULL_RTX)
11999 /* Load from constant pool. On Cortex-A8 this takes two cycles
12000 (for either double or quad vectors). We can not take advantage
12001 of single-cycle VLD1 because we need a PC-relative addressing
12002 mode. */
12003 return const_vec;
12004 else
12005 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12006 We can not construct an initializer. */
12007 return NULL_RTX;
12010 /* Initialize vector TARGET to VALS. */
12012 void
12013 neon_expand_vector_init (rtx target, rtx vals)
12015 enum machine_mode mode = GET_MODE (target);
12016 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12017 int n_elts = GET_MODE_NUNITS (mode);
12018 int n_var = 0, one_var = -1;
12019 bool all_same = true;
12020 rtx x, mem;
12021 int i;
12023 for (i = 0; i < n_elts; ++i)
12025 x = XVECEXP (vals, 0, i);
12026 if (!CONSTANT_P (x))
12027 ++n_var, one_var = i;
12029 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12030 all_same = false;
12033 if (n_var == 0)
12035 rtx constant = neon_make_constant (vals);
12036 if (constant != NULL_RTX)
12038 emit_move_insn (target, constant);
12039 return;
12043 /* Splat a single non-constant element if we can. */
12044 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12046 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12047 emit_insn (gen_rtx_SET (VOIDmode, target,
12048 gen_rtx_VEC_DUPLICATE (mode, x)));
12049 return;
12052 /* One field is non-constant. Load constant then overwrite varying
12053 field. This is more efficient than using the stack. */
12054 if (n_var == 1)
12056 rtx copy = copy_rtx (vals);
12057 rtx index = GEN_INT (one_var);
12059 /* Load constant part of vector, substitute neighboring value for
12060 varying element. */
12061 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12062 neon_expand_vector_init (target, copy);
12064 /* Insert variable. */
12065 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12066 switch (mode)
12068 case V8QImode:
12069 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12070 break;
12071 case V16QImode:
12072 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12073 break;
12074 case V4HImode:
12075 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12076 break;
12077 case V8HImode:
12078 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12079 break;
12080 case V2SImode:
12081 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12082 break;
12083 case V4SImode:
12084 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12085 break;
12086 case V2SFmode:
12087 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12088 break;
12089 case V4SFmode:
12090 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12091 break;
12092 case V2DImode:
12093 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12094 break;
12095 default:
12096 gcc_unreachable ();
12098 return;
12101 /* Construct the vector in memory one field at a time
12102 and load the whole vector. */
12103 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12104 for (i = 0; i < n_elts; i++)
12105 emit_move_insn (adjust_address_nv (mem, inner_mode,
12106 i * GET_MODE_SIZE (inner_mode)),
12107 XVECEXP (vals, 0, i));
12108 emit_move_insn (target, mem);
12111 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12112 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12113 reported source locations are bogus. */
12115 static void
12116 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12117 const char *err)
12119 HOST_WIDE_INT lane;
12121 gcc_assert (CONST_INT_P (operand));
12123 lane = INTVAL (operand);
12125 if (lane < low || lane >= high)
12126 error (err);
12129 /* Bounds-check lanes. */
12131 void
12132 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12134 bounds_check (operand, low, high, "lane out of range");
12137 /* Bounds-check constants. */
12139 void
12140 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12142 bounds_check (operand, low, high, "constant out of range");
12145 HOST_WIDE_INT
12146 neon_element_bits (enum machine_mode mode)
12148 if (mode == DImode)
12149 return GET_MODE_BITSIZE (mode);
12150 else
12151 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12155 /* Predicates for `match_operand' and `match_operator'. */
12157 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12158 WB is true if full writeback address modes are allowed and is false
12159 if limited writeback address modes (POST_INC and PRE_DEC) are
12160 allowed. */
12163 arm_coproc_mem_operand (rtx op, bool wb)
12165 rtx ind;
12167 /* Reject eliminable registers. */
12168 if (! (reload_in_progress || reload_completed)
12169 && ( reg_mentioned_p (frame_pointer_rtx, op)
12170 || reg_mentioned_p (arg_pointer_rtx, op)
12171 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12172 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12173 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12174 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12175 return FALSE;
12177 /* Constants are converted into offsets from labels. */
12178 if (!MEM_P (op))
12179 return FALSE;
12181 ind = XEXP (op, 0);
12183 if (reload_completed
12184 && (GET_CODE (ind) == LABEL_REF
12185 || (GET_CODE (ind) == CONST
12186 && GET_CODE (XEXP (ind, 0)) == PLUS
12187 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12188 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12189 return TRUE;
12191 /* Match: (mem (reg)). */
12192 if (REG_P (ind))
12193 return arm_address_register_rtx_p (ind, 0);
12195 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12196 acceptable in any case (subject to verification by
12197 arm_address_register_rtx_p). We need WB to be true to accept
12198 PRE_INC and POST_DEC. */
12199 if (GET_CODE (ind) == POST_INC
12200 || GET_CODE (ind) == PRE_DEC
12201 || (wb
12202 && (GET_CODE (ind) == PRE_INC
12203 || GET_CODE (ind) == POST_DEC)))
12204 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12206 if (wb
12207 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12208 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12209 && GET_CODE (XEXP (ind, 1)) == PLUS
12210 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12211 ind = XEXP (ind, 1);
12213 /* Match:
12214 (plus (reg)
12215 (const)). */
12216 if (GET_CODE (ind) == PLUS
12217 && REG_P (XEXP (ind, 0))
12218 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12219 && CONST_INT_P (XEXP (ind, 1))
12220 && INTVAL (XEXP (ind, 1)) > -1024
12221 && INTVAL (XEXP (ind, 1)) < 1024
12222 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12223 return TRUE;
12225 return FALSE;
12228 /* Return TRUE if OP is a memory operand which we can load or store a vector
12229 to/from. TYPE is one of the following values:
12230 0 - Vector load/stor (vldr)
12231 1 - Core registers (ldm)
12232 2 - Element/structure loads (vld1)
12235 neon_vector_mem_operand (rtx op, int type, bool strict)
12237 rtx ind;
12239 /* Reject eliminable registers. */
12240 if (! (reload_in_progress || reload_completed)
12241 && ( reg_mentioned_p (frame_pointer_rtx, op)
12242 || reg_mentioned_p (arg_pointer_rtx, op)
12243 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12244 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12245 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12246 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12247 return !strict;
12249 /* Constants are converted into offsets from labels. */
12250 if (!MEM_P (op))
12251 return FALSE;
12253 ind = XEXP (op, 0);
12255 if (reload_completed
12256 && (GET_CODE (ind) == LABEL_REF
12257 || (GET_CODE (ind) == CONST
12258 && GET_CODE (XEXP (ind, 0)) == PLUS
12259 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12260 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12261 return TRUE;
12263 /* Match: (mem (reg)). */
12264 if (REG_P (ind))
12265 return arm_address_register_rtx_p (ind, 0);
12267 /* Allow post-increment with Neon registers. */
12268 if ((type != 1 && GET_CODE (ind) == POST_INC)
12269 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12270 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12272 /* FIXME: vld1 allows register post-modify. */
12274 /* Match:
12275 (plus (reg)
12276 (const)). */
12277 if (type == 0
12278 && GET_CODE (ind) == PLUS
12279 && REG_P (XEXP (ind, 0))
12280 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12281 && CONST_INT_P (XEXP (ind, 1))
12282 && INTVAL (XEXP (ind, 1)) > -1024
12283 /* For quad modes, we restrict the constant offset to be slightly less
12284 than what the instruction format permits. We have no such constraint
12285 on double mode offsets. (This must match arm_legitimate_index_p.) */
12286 && (INTVAL (XEXP (ind, 1))
12287 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12288 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12289 return TRUE;
12291 return FALSE;
12294 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12295 type. */
12297 neon_struct_mem_operand (rtx op)
12299 rtx ind;
12301 /* Reject eliminable registers. */
12302 if (! (reload_in_progress || reload_completed)
12303 && ( reg_mentioned_p (frame_pointer_rtx, op)
12304 || reg_mentioned_p (arg_pointer_rtx, op)
12305 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12306 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12307 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12308 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12309 return FALSE;
12311 /* Constants are converted into offsets from labels. */
12312 if (!MEM_P (op))
12313 return FALSE;
12315 ind = XEXP (op, 0);
12317 if (reload_completed
12318 && (GET_CODE (ind) == LABEL_REF
12319 || (GET_CODE (ind) == CONST
12320 && GET_CODE (XEXP (ind, 0)) == PLUS
12321 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12322 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12323 return TRUE;
12325 /* Match: (mem (reg)). */
12326 if (REG_P (ind))
12327 return arm_address_register_rtx_p (ind, 0);
12329 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12330 if (GET_CODE (ind) == POST_INC
12331 || GET_CODE (ind) == PRE_DEC)
12332 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12334 return FALSE;
12337 /* Return true if X is a register that will be eliminated later on. */
12339 arm_eliminable_register (rtx x)
12341 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12342 || REGNO (x) == ARG_POINTER_REGNUM
12343 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12344 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12347 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12348 coprocessor registers. Otherwise return NO_REGS. */
12350 enum reg_class
12351 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12353 if (mode == HFmode)
12355 if (!TARGET_NEON_FP16)
12356 return GENERAL_REGS;
12357 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12358 return NO_REGS;
12359 return GENERAL_REGS;
12362 /* The neon move patterns handle all legitimate vector and struct
12363 addresses. */
12364 if (TARGET_NEON
12365 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12366 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12367 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12368 || VALID_NEON_STRUCT_MODE (mode)))
12369 return NO_REGS;
12371 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12372 return NO_REGS;
12374 return GENERAL_REGS;
12377 /* Values which must be returned in the most-significant end of the return
12378 register. */
12380 static bool
12381 arm_return_in_msb (const_tree valtype)
12383 return (TARGET_AAPCS_BASED
12384 && BYTES_BIG_ENDIAN
12385 && (AGGREGATE_TYPE_P (valtype)
12386 || TREE_CODE (valtype) == COMPLEX_TYPE
12387 || FIXED_POINT_TYPE_P (valtype)));
12390 /* Return TRUE if X references a SYMBOL_REF. */
12392 symbol_mentioned_p (rtx x)
12394 const char * fmt;
12395 int i;
12397 if (GET_CODE (x) == SYMBOL_REF)
12398 return 1;
12400 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12401 are constant offsets, not symbols. */
12402 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12403 return 0;
12405 fmt = GET_RTX_FORMAT (GET_CODE (x));
12407 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12409 if (fmt[i] == 'E')
12411 int j;
12413 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12414 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12415 return 1;
12417 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12418 return 1;
12421 return 0;
12424 /* Return TRUE if X references a LABEL_REF. */
12426 label_mentioned_p (rtx x)
12428 const char * fmt;
12429 int i;
12431 if (GET_CODE (x) == LABEL_REF)
12432 return 1;
12434 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12435 instruction, but they are constant offsets, not symbols. */
12436 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12437 return 0;
12439 fmt = GET_RTX_FORMAT (GET_CODE (x));
12440 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12442 if (fmt[i] == 'E')
12444 int j;
12446 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12447 if (label_mentioned_p (XVECEXP (x, i, j)))
12448 return 1;
12450 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12451 return 1;
12454 return 0;
12458 tls_mentioned_p (rtx x)
12460 switch (GET_CODE (x))
12462 case CONST:
12463 return tls_mentioned_p (XEXP (x, 0));
12465 case UNSPEC:
12466 if (XINT (x, 1) == UNSPEC_TLS)
12467 return 1;
12469 default:
12470 return 0;
12474 /* Must not copy any rtx that uses a pc-relative address. */
12476 static int
12477 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12479 if (GET_CODE (*x) == UNSPEC
12480 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12481 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12482 return 1;
12483 return 0;
12486 static bool
12487 arm_cannot_copy_insn_p (rtx insn)
12489 /* The tls call insn cannot be copied, as it is paired with a data
12490 word. */
12491 if (recog_memoized (insn) == CODE_FOR_tlscall)
12492 return true;
12494 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12497 enum rtx_code
12498 minmax_code (rtx x)
12500 enum rtx_code code = GET_CODE (x);
12502 switch (code)
12504 case SMAX:
12505 return GE;
12506 case SMIN:
12507 return LE;
12508 case UMIN:
12509 return LEU;
12510 case UMAX:
12511 return GEU;
12512 default:
12513 gcc_unreachable ();
12517 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12519 bool
12520 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12521 int *mask, bool *signed_sat)
12523 /* The high bound must be a power of two minus one. */
12524 int log = exact_log2 (INTVAL (hi_bound) + 1);
12525 if (log == -1)
12526 return false;
12528 /* The low bound is either zero (for usat) or one less than the
12529 negation of the high bound (for ssat). */
12530 if (INTVAL (lo_bound) == 0)
12532 if (mask)
12533 *mask = log;
12534 if (signed_sat)
12535 *signed_sat = false;
12537 return true;
12540 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12542 if (mask)
12543 *mask = log + 1;
12544 if (signed_sat)
12545 *signed_sat = true;
12547 return true;
12550 return false;
12553 /* Return 1 if memory locations are adjacent. */
12555 adjacent_mem_locations (rtx a, rtx b)
12557 /* We don't guarantee to preserve the order of these memory refs. */
12558 if (volatile_refs_p (a) || volatile_refs_p (b))
12559 return 0;
12561 if ((REG_P (XEXP (a, 0))
12562 || (GET_CODE (XEXP (a, 0)) == PLUS
12563 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12564 && (REG_P (XEXP (b, 0))
12565 || (GET_CODE (XEXP (b, 0)) == PLUS
12566 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12568 HOST_WIDE_INT val0 = 0, val1 = 0;
12569 rtx reg0, reg1;
12570 int val_diff;
12572 if (GET_CODE (XEXP (a, 0)) == PLUS)
12574 reg0 = XEXP (XEXP (a, 0), 0);
12575 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12577 else
12578 reg0 = XEXP (a, 0);
12580 if (GET_CODE (XEXP (b, 0)) == PLUS)
12582 reg1 = XEXP (XEXP (b, 0), 0);
12583 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12585 else
12586 reg1 = XEXP (b, 0);
12588 /* Don't accept any offset that will require multiple
12589 instructions to handle, since this would cause the
12590 arith_adjacentmem pattern to output an overlong sequence. */
12591 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12592 return 0;
12594 /* Don't allow an eliminable register: register elimination can make
12595 the offset too large. */
12596 if (arm_eliminable_register (reg0))
12597 return 0;
12599 val_diff = val1 - val0;
12601 if (arm_ld_sched)
12603 /* If the target has load delay slots, then there's no benefit
12604 to using an ldm instruction unless the offset is zero and
12605 we are optimizing for size. */
12606 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12607 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12608 && (val_diff == 4 || val_diff == -4));
12611 return ((REGNO (reg0) == REGNO (reg1))
12612 && (val_diff == 4 || val_diff == -4));
12615 return 0;
12618 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12619 for load operations, false for store operations. CONSECUTIVE is true
12620 if the register numbers in the operation must be consecutive in the register
12621 bank. RETURN_PC is true if value is to be loaded in PC.
12622 The pattern we are trying to match for load is:
12623 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12624 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12627 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12629 where
12630 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12631 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12632 3. If consecutive is TRUE, then for kth register being loaded,
12633 REGNO (R_dk) = REGNO (R_d0) + k.
12634 The pattern for store is similar. */
12635 bool
12636 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12637 bool consecutive, bool return_pc)
12639 HOST_WIDE_INT count = XVECLEN (op, 0);
12640 rtx reg, mem, addr;
12641 unsigned regno;
12642 unsigned first_regno;
12643 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12644 rtx elt;
12645 bool addr_reg_in_reglist = false;
12646 bool update = false;
12647 int reg_increment;
12648 int offset_adj;
12649 int regs_per_val;
12651 /* If not in SImode, then registers must be consecutive
12652 (e.g., VLDM instructions for DFmode). */
12653 gcc_assert ((mode == SImode) || consecutive);
12654 /* Setting return_pc for stores is illegal. */
12655 gcc_assert (!return_pc || load);
12657 /* Set up the increments and the regs per val based on the mode. */
12658 reg_increment = GET_MODE_SIZE (mode);
12659 regs_per_val = reg_increment / 4;
12660 offset_adj = return_pc ? 1 : 0;
12662 if (count <= 1
12663 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12664 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12665 return false;
12667 /* Check if this is a write-back. */
12668 elt = XVECEXP (op, 0, offset_adj);
12669 if (GET_CODE (SET_SRC (elt)) == PLUS)
12671 i++;
12672 base = 1;
12673 update = true;
12675 /* The offset adjustment must be the number of registers being
12676 popped times the size of a single register. */
12677 if (!REG_P (SET_DEST (elt))
12678 || !REG_P (XEXP (SET_SRC (elt), 0))
12679 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12680 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12681 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12682 ((count - 1 - offset_adj) * reg_increment))
12683 return false;
12686 i = i + offset_adj;
12687 base = base + offset_adj;
12688 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12689 success depends on the type: VLDM can do just one reg,
12690 LDM must do at least two. */
12691 if ((count <= i) && (mode == SImode))
12692 return false;
12694 elt = XVECEXP (op, 0, i - 1);
12695 if (GET_CODE (elt) != SET)
12696 return false;
12698 if (load)
12700 reg = SET_DEST (elt);
12701 mem = SET_SRC (elt);
12703 else
12705 reg = SET_SRC (elt);
12706 mem = SET_DEST (elt);
12709 if (!REG_P (reg) || !MEM_P (mem))
12710 return false;
12712 regno = REGNO (reg);
12713 first_regno = regno;
12714 addr = XEXP (mem, 0);
12715 if (GET_CODE (addr) == PLUS)
12717 if (!CONST_INT_P (XEXP (addr, 1)))
12718 return false;
12720 offset = INTVAL (XEXP (addr, 1));
12721 addr = XEXP (addr, 0);
12724 if (!REG_P (addr))
12725 return false;
12727 /* Don't allow SP to be loaded unless it is also the base register. It
12728 guarantees that SP is reset correctly when an LDM instruction
12729 is interrupted. Otherwise, we might end up with a corrupt stack. */
12730 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12731 return false;
12733 for (; i < count; i++)
12735 elt = XVECEXP (op, 0, i);
12736 if (GET_CODE (elt) != SET)
12737 return false;
12739 if (load)
12741 reg = SET_DEST (elt);
12742 mem = SET_SRC (elt);
12744 else
12746 reg = SET_SRC (elt);
12747 mem = SET_DEST (elt);
12750 if (!REG_P (reg)
12751 || GET_MODE (reg) != mode
12752 || REGNO (reg) <= regno
12753 || (consecutive
12754 && (REGNO (reg) !=
12755 (unsigned int) (first_regno + regs_per_val * (i - base))))
12756 /* Don't allow SP to be loaded unless it is also the base register. It
12757 guarantees that SP is reset correctly when an LDM instruction
12758 is interrupted. Otherwise, we might end up with a corrupt stack. */
12759 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12760 || !MEM_P (mem)
12761 || GET_MODE (mem) != mode
12762 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12763 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12764 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12765 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12766 offset + (i - base) * reg_increment))
12767 && (!REG_P (XEXP (mem, 0))
12768 || offset + (i - base) * reg_increment != 0)))
12769 return false;
12771 regno = REGNO (reg);
12772 if (regno == REGNO (addr))
12773 addr_reg_in_reglist = true;
12776 if (load)
12778 if (update && addr_reg_in_reglist)
12779 return false;
12781 /* For Thumb-1, address register is always modified - either by write-back
12782 or by explicit load. If the pattern does not describe an update,
12783 then the address register must be in the list of loaded registers. */
12784 if (TARGET_THUMB1)
12785 return update || addr_reg_in_reglist;
12788 return true;
12791 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12792 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12793 instruction. ADD_OFFSET is nonzero if the base address register needs
12794 to be modified with an add instruction before we can use it. */
12796 static bool
12797 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12798 int nops, HOST_WIDE_INT add_offset)
12800 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12801 if the offset isn't small enough. The reason 2 ldrs are faster
12802 is because these ARMs are able to do more than one cache access
12803 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12804 whilst the ARM8 has a double bandwidth cache. This means that
12805 these cores can do both an instruction fetch and a data fetch in
12806 a single cycle, so the trick of calculating the address into a
12807 scratch register (one of the result regs) and then doing a load
12808 multiple actually becomes slower (and no smaller in code size).
12809 That is the transformation
12811 ldr rd1, [rbase + offset]
12812 ldr rd2, [rbase + offset + 4]
12816 add rd1, rbase, offset
12817 ldmia rd1, {rd1, rd2}
12819 produces worse code -- '3 cycles + any stalls on rd2' instead of
12820 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12821 access per cycle, the first sequence could never complete in less
12822 than 6 cycles, whereas the ldm sequence would only take 5 and
12823 would make better use of sequential accesses if not hitting the
12824 cache.
12826 We cheat here and test 'arm_ld_sched' which we currently know to
12827 only be true for the ARM8, ARM9 and StrongARM. If this ever
12828 changes, then the test below needs to be reworked. */
12829 if (nops == 2 && arm_ld_sched && add_offset != 0)
12830 return false;
12832 /* XScale has load-store double instructions, but they have stricter
12833 alignment requirements than load-store multiple, so we cannot
12834 use them.
12836 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12837 the pipeline until completion.
12839 NREGS CYCLES
12845 An ldr instruction takes 1-3 cycles, but does not block the
12846 pipeline.
12848 NREGS CYCLES
12849 1 1-3
12850 2 2-6
12851 3 3-9
12852 4 4-12
12854 Best case ldr will always win. However, the more ldr instructions
12855 we issue, the less likely we are to be able to schedule them well.
12856 Using ldr instructions also increases code size.
12858 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12859 for counts of 3 or 4 regs. */
12860 if (nops <= 2 && arm_tune_xscale && !optimize_size)
12861 return false;
12862 return true;
12865 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12866 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12867 an array ORDER which describes the sequence to use when accessing the
12868 offsets that produces an ascending order. In this sequence, each
12869 offset must be larger by exactly 4 than the previous one. ORDER[0]
12870 must have been filled in with the lowest offset by the caller.
12871 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12872 we use to verify that ORDER produces an ascending order of registers.
12873 Return true if it was possible to construct such an order, false if
12874 not. */
12876 static bool
12877 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12878 int *unsorted_regs)
12880 int i;
12881 for (i = 1; i < nops; i++)
12883 int j;
12885 order[i] = order[i - 1];
12886 for (j = 0; j < nops; j++)
12887 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12889 /* We must find exactly one offset that is higher than the
12890 previous one by 4. */
12891 if (order[i] != order[i - 1])
12892 return false;
12893 order[i] = j;
12895 if (order[i] == order[i - 1])
12896 return false;
12897 /* The register numbers must be ascending. */
12898 if (unsorted_regs != NULL
12899 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
12900 return false;
12902 return true;
12905 /* Used to determine in a peephole whether a sequence of load
12906 instructions can be changed into a load-multiple instruction.
12907 NOPS is the number of separate load instructions we are examining. The
12908 first NOPS entries in OPERANDS are the destination registers, the
12909 next NOPS entries are memory operands. If this function is
12910 successful, *BASE is set to the common base register of the memory
12911 accesses; *LOAD_OFFSET is set to the first memory location's offset
12912 from that base register.
12913 REGS is an array filled in with the destination register numbers.
12914 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12915 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12916 the sequence of registers in REGS matches the loads from ascending memory
12917 locations, and the function verifies that the register numbers are
12918 themselves ascending. If CHECK_REGS is false, the register numbers
12919 are stored in the order they are found in the operands. */
12920 static int
12921 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
12922 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
12924 int unsorted_regs[MAX_LDM_STM_OPS];
12925 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
12926 int order[MAX_LDM_STM_OPS];
12927 rtx base_reg_rtx = NULL;
12928 int base_reg = -1;
12929 int i, ldm_case;
12931 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12932 easily extended if required. */
12933 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
12935 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
12937 /* Loop over the operands and check that the memory references are
12938 suitable (i.e. immediate offsets from the same base register). At
12939 the same time, extract the target register, and the memory
12940 offsets. */
12941 for (i = 0; i < nops; i++)
12943 rtx reg;
12944 rtx offset;
12946 /* Convert a subreg of a mem into the mem itself. */
12947 if (GET_CODE (operands[nops + i]) == SUBREG)
12948 operands[nops + i] = alter_subreg (operands + (nops + i), true);
12950 gcc_assert (MEM_P (operands[nops + i]));
12952 /* Don't reorder volatile memory references; it doesn't seem worth
12953 looking for the case where the order is ok anyway. */
12954 if (MEM_VOLATILE_P (operands[nops + i]))
12955 return 0;
12957 offset = const0_rtx;
12959 if ((REG_P (reg = XEXP (operands[nops + i], 0))
12960 || (GET_CODE (reg) == SUBREG
12961 && REG_P (reg = SUBREG_REG (reg))))
12962 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
12963 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
12964 || (GET_CODE (reg) == SUBREG
12965 && REG_P (reg = SUBREG_REG (reg))))
12966 && (CONST_INT_P (offset
12967 = XEXP (XEXP (operands[nops + i], 0), 1)))))
12969 if (i == 0)
12971 base_reg = REGNO (reg);
12972 base_reg_rtx = reg;
12973 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
12974 return 0;
12976 else if (base_reg != (int) REGNO (reg))
12977 /* Not addressed from the same base register. */
12978 return 0;
12980 unsorted_regs[i] = (REG_P (operands[i])
12981 ? REGNO (operands[i])
12982 : REGNO (SUBREG_REG (operands[i])));
12984 /* If it isn't an integer register, or if it overwrites the
12985 base register but isn't the last insn in the list, then
12986 we can't do this. */
12987 if (unsorted_regs[i] < 0
12988 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
12989 || unsorted_regs[i] > 14
12990 || (i != nops - 1 && unsorted_regs[i] == base_reg))
12991 return 0;
12993 /* Don't allow SP to be loaded unless it is also the base
12994 register. It guarantees that SP is reset correctly when
12995 an LDM instruction is interrupted. Otherwise, we might
12996 end up with a corrupt stack. */
12997 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
12998 return 0;
13000 unsorted_offsets[i] = INTVAL (offset);
13001 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13002 order[0] = i;
13004 else
13005 /* Not a suitable memory address. */
13006 return 0;
13009 /* All the useful information has now been extracted from the
13010 operands into unsorted_regs and unsorted_offsets; additionally,
13011 order[0] has been set to the lowest offset in the list. Sort
13012 the offsets into order, verifying that they are adjacent, and
13013 check that the register numbers are ascending. */
13014 if (!compute_offset_order (nops, unsorted_offsets, order,
13015 check_regs ? unsorted_regs : NULL))
13016 return 0;
13018 if (saved_order)
13019 memcpy (saved_order, order, sizeof order);
13021 if (base)
13023 *base = base_reg;
13025 for (i = 0; i < nops; i++)
13026 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13028 *load_offset = unsorted_offsets[order[0]];
13031 if (TARGET_THUMB1
13032 && !peep2_reg_dead_p (nops, base_reg_rtx))
13033 return 0;
13035 if (unsorted_offsets[order[0]] == 0)
13036 ldm_case = 1; /* ldmia */
13037 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13038 ldm_case = 2; /* ldmib */
13039 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13040 ldm_case = 3; /* ldmda */
13041 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13042 ldm_case = 4; /* ldmdb */
13043 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13044 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13045 ldm_case = 5;
13046 else
13047 return 0;
13049 if (!multiple_operation_profitable_p (false, nops,
13050 ldm_case == 5
13051 ? unsorted_offsets[order[0]] : 0))
13052 return 0;
13054 return ldm_case;
13057 /* Used to determine in a peephole whether a sequence of store instructions can
13058 be changed into a store-multiple instruction.
13059 NOPS is the number of separate store instructions we are examining.
13060 NOPS_TOTAL is the total number of instructions recognized by the peephole
13061 pattern.
13062 The first NOPS entries in OPERANDS are the source registers, the next
13063 NOPS entries are memory operands. If this function is successful, *BASE is
13064 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13065 to the first memory location's offset from that base register. REGS is an
13066 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13067 likewise filled with the corresponding rtx's.
13068 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13069 numbers to an ascending order of stores.
13070 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13071 from ascending memory locations, and the function verifies that the register
13072 numbers are themselves ascending. If CHECK_REGS is false, the register
13073 numbers are stored in the order they are found in the operands. */
13074 static int
13075 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13076 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13077 HOST_WIDE_INT *load_offset, bool check_regs)
13079 int unsorted_regs[MAX_LDM_STM_OPS];
13080 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13081 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13082 int order[MAX_LDM_STM_OPS];
13083 int base_reg = -1;
13084 rtx base_reg_rtx = NULL;
13085 int i, stm_case;
13087 /* Write back of base register is currently only supported for Thumb 1. */
13088 int base_writeback = TARGET_THUMB1;
13090 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13091 easily extended if required. */
13092 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13094 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13096 /* Loop over the operands and check that the memory references are
13097 suitable (i.e. immediate offsets from the same base register). At
13098 the same time, extract the target register, and the memory
13099 offsets. */
13100 for (i = 0; i < nops; i++)
13102 rtx reg;
13103 rtx offset;
13105 /* Convert a subreg of a mem into the mem itself. */
13106 if (GET_CODE (operands[nops + i]) == SUBREG)
13107 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13109 gcc_assert (MEM_P (operands[nops + i]));
13111 /* Don't reorder volatile memory references; it doesn't seem worth
13112 looking for the case where the order is ok anyway. */
13113 if (MEM_VOLATILE_P (operands[nops + i]))
13114 return 0;
13116 offset = const0_rtx;
13118 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13119 || (GET_CODE (reg) == SUBREG
13120 && REG_P (reg = SUBREG_REG (reg))))
13121 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13122 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13123 || (GET_CODE (reg) == SUBREG
13124 && REG_P (reg = SUBREG_REG (reg))))
13125 && (CONST_INT_P (offset
13126 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13128 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13129 ? operands[i] : SUBREG_REG (operands[i]));
13130 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13132 if (i == 0)
13134 base_reg = REGNO (reg);
13135 base_reg_rtx = reg;
13136 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13137 return 0;
13139 else if (base_reg != (int) REGNO (reg))
13140 /* Not addressed from the same base register. */
13141 return 0;
13143 /* If it isn't an integer register, then we can't do this. */
13144 if (unsorted_regs[i] < 0
13145 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13146 /* The effects are unpredictable if the base register is
13147 both updated and stored. */
13148 || (base_writeback && unsorted_regs[i] == base_reg)
13149 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13150 || unsorted_regs[i] > 14)
13151 return 0;
13153 unsorted_offsets[i] = INTVAL (offset);
13154 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13155 order[0] = i;
13157 else
13158 /* Not a suitable memory address. */
13159 return 0;
13162 /* All the useful information has now been extracted from the
13163 operands into unsorted_regs and unsorted_offsets; additionally,
13164 order[0] has been set to the lowest offset in the list. Sort
13165 the offsets into order, verifying that they are adjacent, and
13166 check that the register numbers are ascending. */
13167 if (!compute_offset_order (nops, unsorted_offsets, order,
13168 check_regs ? unsorted_regs : NULL))
13169 return 0;
13171 if (saved_order)
13172 memcpy (saved_order, order, sizeof order);
13174 if (base)
13176 *base = base_reg;
13178 for (i = 0; i < nops; i++)
13180 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13181 if (reg_rtxs)
13182 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13185 *load_offset = unsorted_offsets[order[0]];
13188 if (TARGET_THUMB1
13189 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13190 return 0;
13192 if (unsorted_offsets[order[0]] == 0)
13193 stm_case = 1; /* stmia */
13194 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13195 stm_case = 2; /* stmib */
13196 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13197 stm_case = 3; /* stmda */
13198 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13199 stm_case = 4; /* stmdb */
13200 else
13201 return 0;
13203 if (!multiple_operation_profitable_p (false, nops, 0))
13204 return 0;
13206 return stm_case;
13209 /* Routines for use in generating RTL. */
13211 /* Generate a load-multiple instruction. COUNT is the number of loads in
13212 the instruction; REGS and MEMS are arrays containing the operands.
13213 BASEREG is the base register to be used in addressing the memory operands.
13214 WBACK_OFFSET is nonzero if the instruction should update the base
13215 register. */
13217 static rtx
13218 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13219 HOST_WIDE_INT wback_offset)
13221 int i = 0, j;
13222 rtx result;
13224 if (!multiple_operation_profitable_p (false, count, 0))
13226 rtx seq;
13228 start_sequence ();
13230 for (i = 0; i < count; i++)
13231 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13233 if (wback_offset != 0)
13234 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13236 seq = get_insns ();
13237 end_sequence ();
13239 return seq;
13242 result = gen_rtx_PARALLEL (VOIDmode,
13243 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13244 if (wback_offset != 0)
13246 XVECEXP (result, 0, 0)
13247 = gen_rtx_SET (VOIDmode, basereg,
13248 plus_constant (Pmode, basereg, wback_offset));
13249 i = 1;
13250 count++;
13253 for (j = 0; i < count; i++, j++)
13254 XVECEXP (result, 0, i)
13255 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13257 return result;
13260 /* Generate a store-multiple instruction. COUNT is the number of stores in
13261 the instruction; REGS and MEMS are arrays containing the operands.
13262 BASEREG is the base register to be used in addressing the memory operands.
13263 WBACK_OFFSET is nonzero if the instruction should update the base
13264 register. */
13266 static rtx
13267 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13268 HOST_WIDE_INT wback_offset)
13270 int i = 0, j;
13271 rtx result;
13273 if (GET_CODE (basereg) == PLUS)
13274 basereg = XEXP (basereg, 0);
13276 if (!multiple_operation_profitable_p (false, count, 0))
13278 rtx seq;
13280 start_sequence ();
13282 for (i = 0; i < count; i++)
13283 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13285 if (wback_offset != 0)
13286 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13288 seq = get_insns ();
13289 end_sequence ();
13291 return seq;
13294 result = gen_rtx_PARALLEL (VOIDmode,
13295 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13296 if (wback_offset != 0)
13298 XVECEXP (result, 0, 0)
13299 = gen_rtx_SET (VOIDmode, basereg,
13300 plus_constant (Pmode, basereg, wback_offset));
13301 i = 1;
13302 count++;
13305 for (j = 0; i < count; i++, j++)
13306 XVECEXP (result, 0, i)
13307 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13309 return result;
13312 /* Generate either a load-multiple or a store-multiple instruction. This
13313 function can be used in situations where we can start with a single MEM
13314 rtx and adjust its address upwards.
13315 COUNT is the number of operations in the instruction, not counting a
13316 possible update of the base register. REGS is an array containing the
13317 register operands.
13318 BASEREG is the base register to be used in addressing the memory operands,
13319 which are constructed from BASEMEM.
13320 WRITE_BACK specifies whether the generated instruction should include an
13321 update of the base register.
13322 OFFSETP is used to pass an offset to and from this function; this offset
13323 is not used when constructing the address (instead BASEMEM should have an
13324 appropriate offset in its address), it is used only for setting
13325 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13327 static rtx
13328 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13329 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13331 rtx mems[MAX_LDM_STM_OPS];
13332 HOST_WIDE_INT offset = *offsetp;
13333 int i;
13335 gcc_assert (count <= MAX_LDM_STM_OPS);
13337 if (GET_CODE (basereg) == PLUS)
13338 basereg = XEXP (basereg, 0);
13340 for (i = 0; i < count; i++)
13342 rtx addr = plus_constant (Pmode, basereg, i * 4);
13343 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13344 offset += 4;
13347 if (write_back)
13348 *offsetp = offset;
13350 if (is_load)
13351 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13352 write_back ? 4 * count : 0);
13353 else
13354 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13355 write_back ? 4 * count : 0);
13359 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13360 rtx basemem, HOST_WIDE_INT *offsetp)
13362 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13363 offsetp);
13367 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13368 rtx basemem, HOST_WIDE_INT *offsetp)
13370 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13371 offsetp);
13374 /* Called from a peephole2 expander to turn a sequence of loads into an
13375 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13376 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13377 is true if we can reorder the registers because they are used commutatively
13378 subsequently.
13379 Returns true iff we could generate a new instruction. */
13381 bool
13382 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13384 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13385 rtx mems[MAX_LDM_STM_OPS];
13386 int i, j, base_reg;
13387 rtx base_reg_rtx;
13388 HOST_WIDE_INT offset;
13389 int write_back = FALSE;
13390 int ldm_case;
13391 rtx addr;
13393 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13394 &base_reg, &offset, !sort_regs);
13396 if (ldm_case == 0)
13397 return false;
13399 if (sort_regs)
13400 for (i = 0; i < nops - 1; i++)
13401 for (j = i + 1; j < nops; j++)
13402 if (regs[i] > regs[j])
13404 int t = regs[i];
13405 regs[i] = regs[j];
13406 regs[j] = t;
13408 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13410 if (TARGET_THUMB1)
13412 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13413 gcc_assert (ldm_case == 1 || ldm_case == 5);
13414 write_back = TRUE;
13417 if (ldm_case == 5)
13419 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13420 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13421 offset = 0;
13422 if (!TARGET_THUMB1)
13424 base_reg = regs[0];
13425 base_reg_rtx = newbase;
13429 for (i = 0; i < nops; i++)
13431 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13432 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13433 SImode, addr, 0);
13435 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13436 write_back ? offset + i * 4 : 0));
13437 return true;
13440 /* Called from a peephole2 expander to turn a sequence of stores into an
13441 STM instruction. OPERANDS are the operands found by the peephole matcher;
13442 NOPS indicates how many separate stores we are trying to combine.
13443 Returns true iff we could generate a new instruction. */
13445 bool
13446 gen_stm_seq (rtx *operands, int nops)
13448 int i;
13449 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13450 rtx mems[MAX_LDM_STM_OPS];
13451 int base_reg;
13452 rtx base_reg_rtx;
13453 HOST_WIDE_INT offset;
13454 int write_back = FALSE;
13455 int stm_case;
13456 rtx addr;
13457 bool base_reg_dies;
13459 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13460 mem_order, &base_reg, &offset, true);
13462 if (stm_case == 0)
13463 return false;
13465 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13467 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13468 if (TARGET_THUMB1)
13470 gcc_assert (base_reg_dies);
13471 write_back = TRUE;
13474 if (stm_case == 5)
13476 gcc_assert (base_reg_dies);
13477 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13478 offset = 0;
13481 addr = plus_constant (Pmode, base_reg_rtx, offset);
13483 for (i = 0; i < nops; i++)
13485 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13486 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13487 SImode, addr, 0);
13489 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13490 write_back ? offset + i * 4 : 0));
13491 return true;
13494 /* Called from a peephole2 expander to turn a sequence of stores that are
13495 preceded by constant loads into an STM instruction. OPERANDS are the
13496 operands found by the peephole matcher; NOPS indicates how many
13497 separate stores we are trying to combine; there are 2 * NOPS
13498 instructions in the peephole.
13499 Returns true iff we could generate a new instruction. */
13501 bool
13502 gen_const_stm_seq (rtx *operands, int nops)
13504 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13505 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13506 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13507 rtx mems[MAX_LDM_STM_OPS];
13508 int base_reg;
13509 rtx base_reg_rtx;
13510 HOST_WIDE_INT offset;
13511 int write_back = FALSE;
13512 int stm_case;
13513 rtx addr;
13514 bool base_reg_dies;
13515 int i, j;
13516 HARD_REG_SET allocated;
13518 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13519 mem_order, &base_reg, &offset, false);
13521 if (stm_case == 0)
13522 return false;
13524 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13526 /* If the same register is used more than once, try to find a free
13527 register. */
13528 CLEAR_HARD_REG_SET (allocated);
13529 for (i = 0; i < nops; i++)
13531 for (j = i + 1; j < nops; j++)
13532 if (regs[i] == regs[j])
13534 rtx t = peep2_find_free_register (0, nops * 2,
13535 TARGET_THUMB1 ? "l" : "r",
13536 SImode, &allocated);
13537 if (t == NULL_RTX)
13538 return false;
13539 reg_rtxs[i] = t;
13540 regs[i] = REGNO (t);
13544 /* Compute an ordering that maps the register numbers to an ascending
13545 sequence. */
13546 reg_order[0] = 0;
13547 for (i = 0; i < nops; i++)
13548 if (regs[i] < regs[reg_order[0]])
13549 reg_order[0] = i;
13551 for (i = 1; i < nops; i++)
13553 int this_order = reg_order[i - 1];
13554 for (j = 0; j < nops; j++)
13555 if (regs[j] > regs[reg_order[i - 1]]
13556 && (this_order == reg_order[i - 1]
13557 || regs[j] < regs[this_order]))
13558 this_order = j;
13559 reg_order[i] = this_order;
13562 /* Ensure that registers that must be live after the instruction end
13563 up with the correct value. */
13564 for (i = 0; i < nops; i++)
13566 int this_order = reg_order[i];
13567 if ((this_order != mem_order[i]
13568 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13569 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13570 return false;
13573 /* Load the constants. */
13574 for (i = 0; i < nops; i++)
13576 rtx op = operands[2 * nops + mem_order[i]];
13577 sorted_regs[i] = regs[reg_order[i]];
13578 emit_move_insn (reg_rtxs[reg_order[i]], op);
13581 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13583 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13584 if (TARGET_THUMB1)
13586 gcc_assert (base_reg_dies);
13587 write_back = TRUE;
13590 if (stm_case == 5)
13592 gcc_assert (base_reg_dies);
13593 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13594 offset = 0;
13597 addr = plus_constant (Pmode, base_reg_rtx, offset);
13599 for (i = 0; i < nops; i++)
13601 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13602 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13603 SImode, addr, 0);
13605 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13606 write_back ? offset + i * 4 : 0));
13607 return true;
13610 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13611 unaligned copies on processors which support unaligned semantics for those
13612 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13613 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13614 An interleave factor of 1 (the minimum) will perform no interleaving.
13615 Load/store multiple are used for aligned addresses where possible. */
13617 static void
13618 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13619 HOST_WIDE_INT length,
13620 unsigned int interleave_factor)
13622 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13623 int *regnos = XALLOCAVEC (int, interleave_factor);
13624 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13625 HOST_WIDE_INT i, j;
13626 HOST_WIDE_INT remaining = length, words;
13627 rtx halfword_tmp = NULL, byte_tmp = NULL;
13628 rtx dst, src;
13629 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13630 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13631 HOST_WIDE_INT srcoffset, dstoffset;
13632 HOST_WIDE_INT src_autoinc, dst_autoinc;
13633 rtx mem, addr;
13635 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13637 /* Use hard registers if we have aligned source or destination so we can use
13638 load/store multiple with contiguous registers. */
13639 if (dst_aligned || src_aligned)
13640 for (i = 0; i < interleave_factor; i++)
13641 regs[i] = gen_rtx_REG (SImode, i);
13642 else
13643 for (i = 0; i < interleave_factor; i++)
13644 regs[i] = gen_reg_rtx (SImode);
13646 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13647 src = copy_addr_to_reg (XEXP (srcbase, 0));
13649 srcoffset = dstoffset = 0;
13651 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13652 For copying the last bytes we want to subtract this offset again. */
13653 src_autoinc = dst_autoinc = 0;
13655 for (i = 0; i < interleave_factor; i++)
13656 regnos[i] = i;
13658 /* Copy BLOCK_SIZE_BYTES chunks. */
13660 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13662 /* Load words. */
13663 if (src_aligned && interleave_factor > 1)
13665 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13666 TRUE, srcbase, &srcoffset));
13667 src_autoinc += UNITS_PER_WORD * interleave_factor;
13669 else
13671 for (j = 0; j < interleave_factor; j++)
13673 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13674 - src_autoinc));
13675 mem = adjust_automodify_address (srcbase, SImode, addr,
13676 srcoffset + j * UNITS_PER_WORD);
13677 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13679 srcoffset += block_size_bytes;
13682 /* Store words. */
13683 if (dst_aligned && interleave_factor > 1)
13685 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13686 TRUE, dstbase, &dstoffset));
13687 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13689 else
13691 for (j = 0; j < interleave_factor; j++)
13693 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13694 - dst_autoinc));
13695 mem = adjust_automodify_address (dstbase, SImode, addr,
13696 dstoffset + j * UNITS_PER_WORD);
13697 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13699 dstoffset += block_size_bytes;
13702 remaining -= block_size_bytes;
13705 /* Copy any whole words left (note these aren't interleaved with any
13706 subsequent halfword/byte load/stores in the interests of simplicity). */
13708 words = remaining / UNITS_PER_WORD;
13710 gcc_assert (words < interleave_factor);
13712 if (src_aligned && words > 1)
13714 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13715 &srcoffset));
13716 src_autoinc += UNITS_PER_WORD * words;
13718 else
13720 for (j = 0; j < words; j++)
13722 addr = plus_constant (Pmode, src,
13723 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13724 mem = adjust_automodify_address (srcbase, SImode, addr,
13725 srcoffset + j * UNITS_PER_WORD);
13726 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13728 srcoffset += words * UNITS_PER_WORD;
13731 if (dst_aligned && words > 1)
13733 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13734 &dstoffset));
13735 dst_autoinc += words * UNITS_PER_WORD;
13737 else
13739 for (j = 0; j < words; j++)
13741 addr = plus_constant (Pmode, dst,
13742 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13743 mem = adjust_automodify_address (dstbase, SImode, addr,
13744 dstoffset + j * UNITS_PER_WORD);
13745 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13747 dstoffset += words * UNITS_PER_WORD;
13750 remaining -= words * UNITS_PER_WORD;
13752 gcc_assert (remaining < 4);
13754 /* Copy a halfword if necessary. */
13756 if (remaining >= 2)
13758 halfword_tmp = gen_reg_rtx (SImode);
13760 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13761 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13762 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13764 /* Either write out immediately, or delay until we've loaded the last
13765 byte, depending on interleave factor. */
13766 if (interleave_factor == 1)
13768 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13769 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13770 emit_insn (gen_unaligned_storehi (mem,
13771 gen_lowpart (HImode, halfword_tmp)));
13772 halfword_tmp = NULL;
13773 dstoffset += 2;
13776 remaining -= 2;
13777 srcoffset += 2;
13780 gcc_assert (remaining < 2);
13782 /* Copy last byte. */
13784 if ((remaining & 1) != 0)
13786 byte_tmp = gen_reg_rtx (SImode);
13788 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13789 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13790 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13792 if (interleave_factor == 1)
13794 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13795 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13796 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13797 byte_tmp = NULL;
13798 dstoffset++;
13801 remaining--;
13802 srcoffset++;
13805 /* Store last halfword if we haven't done so already. */
13807 if (halfword_tmp)
13809 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13810 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13811 emit_insn (gen_unaligned_storehi (mem,
13812 gen_lowpart (HImode, halfword_tmp)));
13813 dstoffset += 2;
13816 /* Likewise for last byte. */
13818 if (byte_tmp)
13820 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13821 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13822 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13823 dstoffset++;
13826 gcc_assert (remaining == 0 && srcoffset == dstoffset);
13829 /* From mips_adjust_block_mem:
13831 Helper function for doing a loop-based block operation on memory
13832 reference MEM. Each iteration of the loop will operate on LENGTH
13833 bytes of MEM.
13835 Create a new base register for use within the loop and point it to
13836 the start of MEM. Create a new memory reference that uses this
13837 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13839 static void
13840 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13841 rtx *loop_mem)
13843 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13845 /* Although the new mem does not refer to a known location,
13846 it does keep up to LENGTH bytes of alignment. */
13847 *loop_mem = change_address (mem, BLKmode, *loop_reg);
13848 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13851 /* From mips_block_move_loop:
13853 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13854 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13855 the memory regions do not overlap. */
13857 static void
13858 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13859 unsigned int interleave_factor,
13860 HOST_WIDE_INT bytes_per_iter)
13862 rtx label, src_reg, dest_reg, final_src, test;
13863 HOST_WIDE_INT leftover;
13865 leftover = length % bytes_per_iter;
13866 length -= leftover;
13868 /* Create registers and memory references for use within the loop. */
13869 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13870 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13872 /* Calculate the value that SRC_REG should have after the last iteration of
13873 the loop. */
13874 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13875 0, 0, OPTAB_WIDEN);
13877 /* Emit the start of the loop. */
13878 label = gen_label_rtx ();
13879 emit_label (label);
13881 /* Emit the loop body. */
13882 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13883 interleave_factor);
13885 /* Move on to the next block. */
13886 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13887 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13889 /* Emit the loop condition. */
13890 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13891 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13893 /* Mop up any left-over bytes. */
13894 if (leftover)
13895 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
13898 /* Emit a block move when either the source or destination is unaligned (not
13899 aligned to a four-byte boundary). This may need further tuning depending on
13900 core type, optimize_size setting, etc. */
13902 static int
13903 arm_movmemqi_unaligned (rtx *operands)
13905 HOST_WIDE_INT length = INTVAL (operands[2]);
13907 if (optimize_size)
13909 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
13910 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
13911 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13912 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13913 or dst_aligned though: allow more interleaving in those cases since the
13914 resulting code can be smaller. */
13915 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
13916 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
13918 if (length > 12)
13919 arm_block_move_unaligned_loop (operands[0], operands[1], length,
13920 interleave_factor, bytes_per_iter);
13921 else
13922 arm_block_move_unaligned_straight (operands[0], operands[1], length,
13923 interleave_factor);
13925 else
13927 /* Note that the loop created by arm_block_move_unaligned_loop may be
13928 subject to loop unrolling, which makes tuning this condition a little
13929 redundant. */
13930 if (length > 32)
13931 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
13932 else
13933 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
13936 return 1;
13940 arm_gen_movmemqi (rtx *operands)
13942 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
13943 HOST_WIDE_INT srcoffset, dstoffset;
13944 int i;
13945 rtx src, dst, srcbase, dstbase;
13946 rtx part_bytes_reg = NULL;
13947 rtx mem;
13949 if (!CONST_INT_P (operands[2])
13950 || !CONST_INT_P (operands[3])
13951 || INTVAL (operands[2]) > 64)
13952 return 0;
13954 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
13955 return arm_movmemqi_unaligned (operands);
13957 if (INTVAL (operands[3]) & 3)
13958 return 0;
13960 dstbase = operands[0];
13961 srcbase = operands[1];
13963 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
13964 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
13966 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
13967 out_words_to_go = INTVAL (operands[2]) / 4;
13968 last_bytes = INTVAL (operands[2]) & 3;
13969 dstoffset = srcoffset = 0;
13971 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
13972 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
13974 for (i = 0; in_words_to_go >= 2; i+=4)
13976 if (in_words_to_go > 4)
13977 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
13978 TRUE, srcbase, &srcoffset));
13979 else
13980 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
13981 src, FALSE, srcbase,
13982 &srcoffset));
13984 if (out_words_to_go)
13986 if (out_words_to_go > 4)
13987 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
13988 TRUE, dstbase, &dstoffset));
13989 else if (out_words_to_go != 1)
13990 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
13991 out_words_to_go, dst,
13992 (last_bytes == 0
13993 ? FALSE : TRUE),
13994 dstbase, &dstoffset));
13995 else
13997 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
13998 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
13999 if (last_bytes != 0)
14001 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14002 dstoffset += 4;
14007 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14008 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14011 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14012 if (out_words_to_go)
14014 rtx sreg;
14016 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14017 sreg = copy_to_reg (mem);
14019 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14020 emit_move_insn (mem, sreg);
14021 in_words_to_go--;
14023 gcc_assert (!in_words_to_go); /* Sanity check */
14026 if (in_words_to_go)
14028 gcc_assert (in_words_to_go > 0);
14030 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14031 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14034 gcc_assert (!last_bytes || part_bytes_reg);
14036 if (BYTES_BIG_ENDIAN && last_bytes)
14038 rtx tmp = gen_reg_rtx (SImode);
14040 /* The bytes we want are in the top end of the word. */
14041 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14042 GEN_INT (8 * (4 - last_bytes))));
14043 part_bytes_reg = tmp;
14045 while (last_bytes)
14047 mem = adjust_automodify_address (dstbase, QImode,
14048 plus_constant (Pmode, dst,
14049 last_bytes - 1),
14050 dstoffset + last_bytes - 1);
14051 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14053 if (--last_bytes)
14055 tmp = gen_reg_rtx (SImode);
14056 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14057 part_bytes_reg = tmp;
14062 else
14064 if (last_bytes > 1)
14066 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14067 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14068 last_bytes -= 2;
14069 if (last_bytes)
14071 rtx tmp = gen_reg_rtx (SImode);
14072 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14073 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14074 part_bytes_reg = tmp;
14075 dstoffset += 2;
14079 if (last_bytes)
14081 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14082 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14086 return 1;
14089 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14090 by mode size. */
14091 inline static rtx
14092 next_consecutive_mem (rtx mem)
14094 enum machine_mode mode = GET_MODE (mem);
14095 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14096 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14098 return adjust_automodify_address (mem, mode, addr, offset);
14101 /* Copy using LDRD/STRD instructions whenever possible.
14102 Returns true upon success. */
14103 bool
14104 gen_movmem_ldrd_strd (rtx *operands)
14106 unsigned HOST_WIDE_INT len;
14107 HOST_WIDE_INT align;
14108 rtx src, dst, base;
14109 rtx reg0;
14110 bool src_aligned, dst_aligned;
14111 bool src_volatile, dst_volatile;
14113 gcc_assert (CONST_INT_P (operands[2]));
14114 gcc_assert (CONST_INT_P (operands[3]));
14116 len = UINTVAL (operands[2]);
14117 if (len > 64)
14118 return false;
14120 /* Maximum alignment we can assume for both src and dst buffers. */
14121 align = INTVAL (operands[3]);
14123 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14124 return false;
14126 /* Place src and dst addresses in registers
14127 and update the corresponding mem rtx. */
14128 dst = operands[0];
14129 dst_volatile = MEM_VOLATILE_P (dst);
14130 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14131 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14132 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14134 src = operands[1];
14135 src_volatile = MEM_VOLATILE_P (src);
14136 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14137 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14138 src = adjust_automodify_address (src, VOIDmode, base, 0);
14140 if (!unaligned_access && !(src_aligned && dst_aligned))
14141 return false;
14143 if (src_volatile || dst_volatile)
14144 return false;
14146 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14147 if (!(dst_aligned || src_aligned))
14148 return arm_gen_movmemqi (operands);
14150 src = adjust_address (src, DImode, 0);
14151 dst = adjust_address (dst, DImode, 0);
14152 while (len >= 8)
14154 len -= 8;
14155 reg0 = gen_reg_rtx (DImode);
14156 if (src_aligned)
14157 emit_move_insn (reg0, src);
14158 else
14159 emit_insn (gen_unaligned_loaddi (reg0, src));
14161 if (dst_aligned)
14162 emit_move_insn (dst, reg0);
14163 else
14164 emit_insn (gen_unaligned_storedi (dst, reg0));
14166 src = next_consecutive_mem (src);
14167 dst = next_consecutive_mem (dst);
14170 gcc_assert (len < 8);
14171 if (len >= 4)
14173 /* More than a word but less than a double-word to copy. Copy a word. */
14174 reg0 = gen_reg_rtx (SImode);
14175 src = adjust_address (src, SImode, 0);
14176 dst = adjust_address (dst, SImode, 0);
14177 if (src_aligned)
14178 emit_move_insn (reg0, src);
14179 else
14180 emit_insn (gen_unaligned_loadsi (reg0, src));
14182 if (dst_aligned)
14183 emit_move_insn (dst, reg0);
14184 else
14185 emit_insn (gen_unaligned_storesi (dst, reg0));
14187 src = next_consecutive_mem (src);
14188 dst = next_consecutive_mem (dst);
14189 len -= 4;
14192 if (len == 0)
14193 return true;
14195 /* Copy the remaining bytes. */
14196 if (len >= 2)
14198 dst = adjust_address (dst, HImode, 0);
14199 src = adjust_address (src, HImode, 0);
14200 reg0 = gen_reg_rtx (SImode);
14201 if (src_aligned)
14202 emit_insn (gen_zero_extendhisi2 (reg0, src));
14203 else
14204 emit_insn (gen_unaligned_loadhiu (reg0, src));
14206 if (dst_aligned)
14207 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14208 else
14209 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14211 src = next_consecutive_mem (src);
14212 dst = next_consecutive_mem (dst);
14213 if (len == 2)
14214 return true;
14217 dst = adjust_address (dst, QImode, 0);
14218 src = adjust_address (src, QImode, 0);
14219 reg0 = gen_reg_rtx (QImode);
14220 emit_move_insn (reg0, src);
14221 emit_move_insn (dst, reg0);
14222 return true;
14225 /* Select a dominance comparison mode if possible for a test of the general
14226 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14227 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14228 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14229 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14230 In all cases OP will be either EQ or NE, but we don't need to know which
14231 here. If we are unable to support a dominance comparison we return
14232 CC mode. This will then fail to match for the RTL expressions that
14233 generate this call. */
14234 enum machine_mode
14235 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14237 enum rtx_code cond1, cond2;
14238 int swapped = 0;
14240 /* Currently we will probably get the wrong result if the individual
14241 comparisons are not simple. This also ensures that it is safe to
14242 reverse a comparison if necessary. */
14243 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14244 != CCmode)
14245 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14246 != CCmode))
14247 return CCmode;
14249 /* The if_then_else variant of this tests the second condition if the
14250 first passes, but is true if the first fails. Reverse the first
14251 condition to get a true "inclusive-or" expression. */
14252 if (cond_or == DOM_CC_NX_OR_Y)
14253 cond1 = reverse_condition (cond1);
14255 /* If the comparisons are not equal, and one doesn't dominate the other,
14256 then we can't do this. */
14257 if (cond1 != cond2
14258 && !comparison_dominates_p (cond1, cond2)
14259 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14260 return CCmode;
14262 if (swapped)
14264 enum rtx_code temp = cond1;
14265 cond1 = cond2;
14266 cond2 = temp;
14269 switch (cond1)
14271 case EQ:
14272 if (cond_or == DOM_CC_X_AND_Y)
14273 return CC_DEQmode;
14275 switch (cond2)
14277 case EQ: return CC_DEQmode;
14278 case LE: return CC_DLEmode;
14279 case LEU: return CC_DLEUmode;
14280 case GE: return CC_DGEmode;
14281 case GEU: return CC_DGEUmode;
14282 default: gcc_unreachable ();
14285 case LT:
14286 if (cond_or == DOM_CC_X_AND_Y)
14287 return CC_DLTmode;
14289 switch (cond2)
14291 case LT:
14292 return CC_DLTmode;
14293 case LE:
14294 return CC_DLEmode;
14295 case NE:
14296 return CC_DNEmode;
14297 default:
14298 gcc_unreachable ();
14301 case GT:
14302 if (cond_or == DOM_CC_X_AND_Y)
14303 return CC_DGTmode;
14305 switch (cond2)
14307 case GT:
14308 return CC_DGTmode;
14309 case GE:
14310 return CC_DGEmode;
14311 case NE:
14312 return CC_DNEmode;
14313 default:
14314 gcc_unreachable ();
14317 case LTU:
14318 if (cond_or == DOM_CC_X_AND_Y)
14319 return CC_DLTUmode;
14321 switch (cond2)
14323 case LTU:
14324 return CC_DLTUmode;
14325 case LEU:
14326 return CC_DLEUmode;
14327 case NE:
14328 return CC_DNEmode;
14329 default:
14330 gcc_unreachable ();
14333 case GTU:
14334 if (cond_or == DOM_CC_X_AND_Y)
14335 return CC_DGTUmode;
14337 switch (cond2)
14339 case GTU:
14340 return CC_DGTUmode;
14341 case GEU:
14342 return CC_DGEUmode;
14343 case NE:
14344 return CC_DNEmode;
14345 default:
14346 gcc_unreachable ();
14349 /* The remaining cases only occur when both comparisons are the
14350 same. */
14351 case NE:
14352 gcc_assert (cond1 == cond2);
14353 return CC_DNEmode;
14355 case LE:
14356 gcc_assert (cond1 == cond2);
14357 return CC_DLEmode;
14359 case GE:
14360 gcc_assert (cond1 == cond2);
14361 return CC_DGEmode;
14363 case LEU:
14364 gcc_assert (cond1 == cond2);
14365 return CC_DLEUmode;
14367 case GEU:
14368 gcc_assert (cond1 == cond2);
14369 return CC_DGEUmode;
14371 default:
14372 gcc_unreachable ();
14376 enum machine_mode
14377 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14379 /* All floating point compares return CCFP if it is an equality
14380 comparison, and CCFPE otherwise. */
14381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14383 switch (op)
14385 case EQ:
14386 case NE:
14387 case UNORDERED:
14388 case ORDERED:
14389 case UNLT:
14390 case UNLE:
14391 case UNGT:
14392 case UNGE:
14393 case UNEQ:
14394 case LTGT:
14395 return CCFPmode;
14397 case LT:
14398 case LE:
14399 case GT:
14400 case GE:
14401 return CCFPEmode;
14403 default:
14404 gcc_unreachable ();
14408 /* A compare with a shifted operand. Because of canonicalization, the
14409 comparison will have to be swapped when we emit the assembler. */
14410 if (GET_MODE (y) == SImode
14411 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14412 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14413 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14414 || GET_CODE (x) == ROTATERT))
14415 return CC_SWPmode;
14417 /* This operation is performed swapped, but since we only rely on the Z
14418 flag we don't need an additional mode. */
14419 if (GET_MODE (y) == SImode
14420 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14421 && GET_CODE (x) == NEG
14422 && (op == EQ || op == NE))
14423 return CC_Zmode;
14425 /* This is a special case that is used by combine to allow a
14426 comparison of a shifted byte load to be split into a zero-extend
14427 followed by a comparison of the shifted integer (only valid for
14428 equalities and unsigned inequalities). */
14429 if (GET_MODE (x) == SImode
14430 && GET_CODE (x) == ASHIFT
14431 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14432 && GET_CODE (XEXP (x, 0)) == SUBREG
14433 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14434 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14435 && (op == EQ || op == NE
14436 || op == GEU || op == GTU || op == LTU || op == LEU)
14437 && CONST_INT_P (y))
14438 return CC_Zmode;
14440 /* A construct for a conditional compare, if the false arm contains
14441 0, then both conditions must be true, otherwise either condition
14442 must be true. Not all conditions are possible, so CCmode is
14443 returned if it can't be done. */
14444 if (GET_CODE (x) == IF_THEN_ELSE
14445 && (XEXP (x, 2) == const0_rtx
14446 || XEXP (x, 2) == const1_rtx)
14447 && COMPARISON_P (XEXP (x, 0))
14448 && COMPARISON_P (XEXP (x, 1)))
14449 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14450 INTVAL (XEXP (x, 2)));
14452 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14453 if (GET_CODE (x) == AND
14454 && (op == EQ || op == NE)
14455 && COMPARISON_P (XEXP (x, 0))
14456 && COMPARISON_P (XEXP (x, 1)))
14457 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14458 DOM_CC_X_AND_Y);
14460 if (GET_CODE (x) == IOR
14461 && (op == EQ || op == NE)
14462 && COMPARISON_P (XEXP (x, 0))
14463 && COMPARISON_P (XEXP (x, 1)))
14464 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14465 DOM_CC_X_OR_Y);
14467 /* An operation (on Thumb) where we want to test for a single bit.
14468 This is done by shifting that bit up into the top bit of a
14469 scratch register; we can then branch on the sign bit. */
14470 if (TARGET_THUMB1
14471 && GET_MODE (x) == SImode
14472 && (op == EQ || op == NE)
14473 && GET_CODE (x) == ZERO_EXTRACT
14474 && XEXP (x, 1) == const1_rtx)
14475 return CC_Nmode;
14477 /* An operation that sets the condition codes as a side-effect, the
14478 V flag is not set correctly, so we can only use comparisons where
14479 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14480 instead.) */
14481 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14482 if (GET_MODE (x) == SImode
14483 && y == const0_rtx
14484 && (op == EQ || op == NE || op == LT || op == GE)
14485 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14486 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14487 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14488 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14489 || GET_CODE (x) == LSHIFTRT
14490 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14491 || GET_CODE (x) == ROTATERT
14492 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14493 return CC_NOOVmode;
14495 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14496 return CC_Zmode;
14498 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14499 && GET_CODE (x) == PLUS
14500 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14501 return CC_Cmode;
14503 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14505 switch (op)
14507 case EQ:
14508 case NE:
14509 /* A DImode comparison against zero can be implemented by
14510 or'ing the two halves together. */
14511 if (y == const0_rtx)
14512 return CC_Zmode;
14514 /* We can do an equality test in three Thumb instructions. */
14515 if (!TARGET_32BIT)
14516 return CC_Zmode;
14518 /* FALLTHROUGH */
14520 case LTU:
14521 case LEU:
14522 case GTU:
14523 case GEU:
14524 /* DImode unsigned comparisons can be implemented by cmp +
14525 cmpeq without a scratch register. Not worth doing in
14526 Thumb-2. */
14527 if (TARGET_32BIT)
14528 return CC_CZmode;
14530 /* FALLTHROUGH */
14532 case LT:
14533 case LE:
14534 case GT:
14535 case GE:
14536 /* DImode signed and unsigned comparisons can be implemented
14537 by cmp + sbcs with a scratch register, but that does not
14538 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14539 gcc_assert (op != EQ && op != NE);
14540 return CC_NCVmode;
14542 default:
14543 gcc_unreachable ();
14547 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14548 return GET_MODE (x);
14550 return CCmode;
14553 /* X and Y are two things to compare using CODE. Emit the compare insn and
14554 return the rtx for register 0 in the proper mode. FP means this is a
14555 floating point compare: I don't think that it is needed on the arm. */
14557 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14559 enum machine_mode mode;
14560 rtx cc_reg;
14561 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14563 /* We might have X as a constant, Y as a register because of the predicates
14564 used for cmpdi. If so, force X to a register here. */
14565 if (dimode_comparison && !REG_P (x))
14566 x = force_reg (DImode, x);
14568 mode = SELECT_CC_MODE (code, x, y);
14569 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14571 if (dimode_comparison
14572 && mode != CC_CZmode)
14574 rtx clobber, set;
14576 /* To compare two non-zero values for equality, XOR them and
14577 then compare against zero. Not used for ARM mode; there
14578 CC_CZmode is cheaper. */
14579 if (mode == CC_Zmode && y != const0_rtx)
14581 gcc_assert (!reload_completed);
14582 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14583 y = const0_rtx;
14586 /* A scratch register is required. */
14587 if (reload_completed)
14588 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14589 else
14590 scratch = gen_rtx_SCRATCH (SImode);
14592 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14593 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14594 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14596 else
14597 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14599 return cc_reg;
14602 /* Generate a sequence of insns that will generate the correct return
14603 address mask depending on the physical architecture that the program
14604 is running on. */
14606 arm_gen_return_addr_mask (void)
14608 rtx reg = gen_reg_rtx (Pmode);
14610 emit_insn (gen_return_addr_mask (reg));
14611 return reg;
14614 void
14615 arm_reload_in_hi (rtx *operands)
14617 rtx ref = operands[1];
14618 rtx base, scratch;
14619 HOST_WIDE_INT offset = 0;
14621 if (GET_CODE (ref) == SUBREG)
14623 offset = SUBREG_BYTE (ref);
14624 ref = SUBREG_REG (ref);
14627 if (REG_P (ref))
14629 /* We have a pseudo which has been spilt onto the stack; there
14630 are two cases here: the first where there is a simple
14631 stack-slot replacement and a second where the stack-slot is
14632 out of range, or is used as a subreg. */
14633 if (reg_equiv_mem (REGNO (ref)))
14635 ref = reg_equiv_mem (REGNO (ref));
14636 base = find_replacement (&XEXP (ref, 0));
14638 else
14639 /* The slot is out of range, or was dressed up in a SUBREG. */
14640 base = reg_equiv_address (REGNO (ref));
14642 else
14643 base = find_replacement (&XEXP (ref, 0));
14645 /* Handle the case where the address is too complex to be offset by 1. */
14646 if (GET_CODE (base) == MINUS
14647 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14649 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14651 emit_set_insn (base_plus, base);
14652 base = base_plus;
14654 else if (GET_CODE (base) == PLUS)
14656 /* The addend must be CONST_INT, or we would have dealt with it above. */
14657 HOST_WIDE_INT hi, lo;
14659 offset += INTVAL (XEXP (base, 1));
14660 base = XEXP (base, 0);
14662 /* Rework the address into a legal sequence of insns. */
14663 /* Valid range for lo is -4095 -> 4095 */
14664 lo = (offset >= 0
14665 ? (offset & 0xfff)
14666 : -((-offset) & 0xfff));
14668 /* Corner case, if lo is the max offset then we would be out of range
14669 once we have added the additional 1 below, so bump the msb into the
14670 pre-loading insn(s). */
14671 if (lo == 4095)
14672 lo &= 0x7ff;
14674 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14675 ^ (HOST_WIDE_INT) 0x80000000)
14676 - (HOST_WIDE_INT) 0x80000000);
14678 gcc_assert (hi + lo == offset);
14680 if (hi != 0)
14682 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14684 /* Get the base address; addsi3 knows how to handle constants
14685 that require more than one insn. */
14686 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14687 base = base_plus;
14688 offset = lo;
14692 /* Operands[2] may overlap operands[0] (though it won't overlap
14693 operands[1]), that's why we asked for a DImode reg -- so we can
14694 use the bit that does not overlap. */
14695 if (REGNO (operands[2]) == REGNO (operands[0]))
14696 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14697 else
14698 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14700 emit_insn (gen_zero_extendqisi2 (scratch,
14701 gen_rtx_MEM (QImode,
14702 plus_constant (Pmode, base,
14703 offset))));
14704 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14705 gen_rtx_MEM (QImode,
14706 plus_constant (Pmode, base,
14707 offset + 1))));
14708 if (!BYTES_BIG_ENDIAN)
14709 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14710 gen_rtx_IOR (SImode,
14711 gen_rtx_ASHIFT
14712 (SImode,
14713 gen_rtx_SUBREG (SImode, operands[0], 0),
14714 GEN_INT (8)),
14715 scratch));
14716 else
14717 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14718 gen_rtx_IOR (SImode,
14719 gen_rtx_ASHIFT (SImode, scratch,
14720 GEN_INT (8)),
14721 gen_rtx_SUBREG (SImode, operands[0], 0)));
14724 /* Handle storing a half-word to memory during reload by synthesizing as two
14725 byte stores. Take care not to clobber the input values until after we
14726 have moved them somewhere safe. This code assumes that if the DImode
14727 scratch in operands[2] overlaps either the input value or output address
14728 in some way, then that value must die in this insn (we absolutely need
14729 two scratch registers for some corner cases). */
14730 void
14731 arm_reload_out_hi (rtx *operands)
14733 rtx ref = operands[0];
14734 rtx outval = operands[1];
14735 rtx base, scratch;
14736 HOST_WIDE_INT offset = 0;
14738 if (GET_CODE (ref) == SUBREG)
14740 offset = SUBREG_BYTE (ref);
14741 ref = SUBREG_REG (ref);
14744 if (REG_P (ref))
14746 /* We have a pseudo which has been spilt onto the stack; there
14747 are two cases here: the first where there is a simple
14748 stack-slot replacement and a second where the stack-slot is
14749 out of range, or is used as a subreg. */
14750 if (reg_equiv_mem (REGNO (ref)))
14752 ref = reg_equiv_mem (REGNO (ref));
14753 base = find_replacement (&XEXP (ref, 0));
14755 else
14756 /* The slot is out of range, or was dressed up in a SUBREG. */
14757 base = reg_equiv_address (REGNO (ref));
14759 else
14760 base = find_replacement (&XEXP (ref, 0));
14762 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14764 /* Handle the case where the address is too complex to be offset by 1. */
14765 if (GET_CODE (base) == MINUS
14766 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14768 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14770 /* Be careful not to destroy OUTVAL. */
14771 if (reg_overlap_mentioned_p (base_plus, outval))
14773 /* Updating base_plus might destroy outval, see if we can
14774 swap the scratch and base_plus. */
14775 if (!reg_overlap_mentioned_p (scratch, outval))
14777 rtx tmp = scratch;
14778 scratch = base_plus;
14779 base_plus = tmp;
14781 else
14783 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14785 /* Be conservative and copy OUTVAL into the scratch now,
14786 this should only be necessary if outval is a subreg
14787 of something larger than a word. */
14788 /* XXX Might this clobber base? I can't see how it can,
14789 since scratch is known to overlap with OUTVAL, and
14790 must be wider than a word. */
14791 emit_insn (gen_movhi (scratch_hi, outval));
14792 outval = scratch_hi;
14796 emit_set_insn (base_plus, base);
14797 base = base_plus;
14799 else if (GET_CODE (base) == PLUS)
14801 /* The addend must be CONST_INT, or we would have dealt with it above. */
14802 HOST_WIDE_INT hi, lo;
14804 offset += INTVAL (XEXP (base, 1));
14805 base = XEXP (base, 0);
14807 /* Rework the address into a legal sequence of insns. */
14808 /* Valid range for lo is -4095 -> 4095 */
14809 lo = (offset >= 0
14810 ? (offset & 0xfff)
14811 : -((-offset) & 0xfff));
14813 /* Corner case, if lo is the max offset then we would be out of range
14814 once we have added the additional 1 below, so bump the msb into the
14815 pre-loading insn(s). */
14816 if (lo == 4095)
14817 lo &= 0x7ff;
14819 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14820 ^ (HOST_WIDE_INT) 0x80000000)
14821 - (HOST_WIDE_INT) 0x80000000);
14823 gcc_assert (hi + lo == offset);
14825 if (hi != 0)
14827 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14829 /* Be careful not to destroy OUTVAL. */
14830 if (reg_overlap_mentioned_p (base_plus, outval))
14832 /* Updating base_plus might destroy outval, see if we
14833 can swap the scratch and base_plus. */
14834 if (!reg_overlap_mentioned_p (scratch, outval))
14836 rtx tmp = scratch;
14837 scratch = base_plus;
14838 base_plus = tmp;
14840 else
14842 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14844 /* Be conservative and copy outval into scratch now,
14845 this should only be necessary if outval is a
14846 subreg of something larger than a word. */
14847 /* XXX Might this clobber base? I can't see how it
14848 can, since scratch is known to overlap with
14849 outval. */
14850 emit_insn (gen_movhi (scratch_hi, outval));
14851 outval = scratch_hi;
14855 /* Get the base address; addsi3 knows how to handle constants
14856 that require more than one insn. */
14857 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14858 base = base_plus;
14859 offset = lo;
14863 if (BYTES_BIG_ENDIAN)
14865 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14866 plus_constant (Pmode, base,
14867 offset + 1)),
14868 gen_lowpart (QImode, outval)));
14869 emit_insn (gen_lshrsi3 (scratch,
14870 gen_rtx_SUBREG (SImode, outval, 0),
14871 GEN_INT (8)));
14872 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14873 offset)),
14874 gen_lowpart (QImode, scratch)));
14876 else
14878 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
14879 offset)),
14880 gen_lowpart (QImode, outval)));
14881 emit_insn (gen_lshrsi3 (scratch,
14882 gen_rtx_SUBREG (SImode, outval, 0),
14883 GEN_INT (8)));
14884 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
14885 plus_constant (Pmode, base,
14886 offset + 1)),
14887 gen_lowpart (QImode, scratch)));
14891 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14892 (padded to the size of a word) should be passed in a register. */
14894 static bool
14895 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
14897 if (TARGET_AAPCS_BASED)
14898 return must_pass_in_stack_var_size (mode, type);
14899 else
14900 return must_pass_in_stack_var_size_or_pad (mode, type);
14904 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14905 Return true if an argument passed on the stack should be padded upwards,
14906 i.e. if the least-significant byte has useful data.
14907 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14908 aggregate types are placed in the lowest memory address. */
14910 bool
14911 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
14913 if (!TARGET_AAPCS_BASED)
14914 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
14916 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
14917 return false;
14919 return true;
14923 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14924 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14925 register has useful data, and return the opposite if the most
14926 significant byte does. */
14928 bool
14929 arm_pad_reg_upward (enum machine_mode mode,
14930 tree type, int first ATTRIBUTE_UNUSED)
14932 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
14934 /* For AAPCS, small aggregates, small fixed-point types,
14935 and small complex types are always padded upwards. */
14936 if (type)
14938 if ((AGGREGATE_TYPE_P (type)
14939 || TREE_CODE (type) == COMPLEX_TYPE
14940 || FIXED_POINT_TYPE_P (type))
14941 && int_size_in_bytes (type) <= 4)
14942 return true;
14944 else
14946 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
14947 && GET_MODE_SIZE (mode) <= 4)
14948 return true;
14952 /* Otherwise, use default padding. */
14953 return !BYTES_BIG_ENDIAN;
14956 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14957 assuming that the address in the base register is word aligned. */
14958 bool
14959 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
14961 HOST_WIDE_INT max_offset;
14963 /* Offset must be a multiple of 4 in Thumb mode. */
14964 if (TARGET_THUMB2 && ((offset & 3) != 0))
14965 return false;
14967 if (TARGET_THUMB2)
14968 max_offset = 1020;
14969 else if (TARGET_ARM)
14970 max_offset = 255;
14971 else
14972 return false;
14974 return ((offset <= max_offset) && (offset >= -max_offset));
14977 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14978 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14979 Assumes that the address in the base register RN is word aligned. Pattern
14980 guarantees that both memory accesses use the same base register,
14981 the offsets are constants within the range, and the gap between the offsets is 4.
14982 If preload complete then check that registers are legal. WBACK indicates whether
14983 address is updated. LOAD indicates whether memory access is load or store. */
14984 bool
14985 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
14986 bool wback, bool load)
14988 unsigned int t, t2, n;
14990 if (!reload_completed)
14991 return true;
14993 if (!offset_ok_for_ldrd_strd (offset))
14994 return false;
14996 t = REGNO (rt);
14997 t2 = REGNO (rt2);
14998 n = REGNO (rn);
15000 if ((TARGET_THUMB2)
15001 && ((wback && (n == t || n == t2))
15002 || (t == SP_REGNUM)
15003 || (t == PC_REGNUM)
15004 || (t2 == SP_REGNUM)
15005 || (t2 == PC_REGNUM)
15006 || (!load && (n == PC_REGNUM))
15007 || (load && (t == t2))
15008 /* Triggers Cortex-M3 LDRD errata. */
15009 || (!wback && load && fix_cm3_ldrd && (n == t))))
15010 return false;
15012 if ((TARGET_ARM)
15013 && ((wback && (n == t || n == t2))
15014 || (t2 == PC_REGNUM)
15015 || (t % 2 != 0) /* First destination register is not even. */
15016 || (t2 != t + 1)
15017 /* PC can be used as base register (for offset addressing only),
15018 but it is depricated. */
15019 || (n == PC_REGNUM)))
15020 return false;
15022 return true;
15025 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15026 operand ADDR is an immediate offset from the base register and is
15027 not volatile, in which case it sets BASE and OFFSET
15028 accordingly. */
15029 bool
15030 mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
15032 /* TODO: Handle more general memory operand patterns, such as
15033 PRE_DEC and PRE_INC. */
15035 /* Convert a subreg of mem into mem itself. */
15036 if (GET_CODE (addr) == SUBREG)
15037 addr = alter_subreg (&addr, true);
15039 gcc_assert (MEM_P (addr));
15041 /* Don't modify volatile memory accesses. */
15042 if (MEM_VOLATILE_P (addr))
15043 return false;
15045 *offset = const0_rtx;
15047 addr = XEXP (addr, 0);
15048 if (REG_P (addr))
15050 *base = addr;
15051 return true;
15053 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15055 *base = XEXP (addr, 0);
15056 *offset = XEXP (addr, 1);
15057 return (REG_P (*base) && CONST_INT_P (*offset));
15060 return false;
15063 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15065 /* Called from a peephole2 to replace two word-size accesses with a
15066 single LDRD/STRD instruction. Returns true iff we can generate a
15067 new instruction sequence. That is, both accesses use the same base
15068 register and the gap between constant offsets is 4. This function
15069 may reorder its operands to match ldrd/strd RTL templates.
15070 OPERANDS are the operands found by the peephole matcher;
15071 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15072 corresponding memory operands. LOAD indicaates whether the access
15073 is load or store. CONST_STORE indicates a store of constant
15074 integer values held in OPERANDS[4,5] and assumes that the pattern
15075 is of length 4 insn, for the purpose of checking dead registers.
15076 COMMUTE indicates that register operands may be reordered. */
15077 bool
15078 gen_operands_ldrd_strd (rtx *operands, bool load,
15079 bool const_store, bool commute)
15081 int nops = 2;
15082 HOST_WIDE_INT offsets[2], offset;
15083 rtx base = NULL_RTX;
15084 rtx cur_base, cur_offset, tmp;
15085 int i, gap;
15086 HARD_REG_SET regset;
15088 gcc_assert (!const_store || !load);
15089 /* Check that the memory references are immediate offsets from the
15090 same base register. Extract the base register, the destination
15091 registers, and the corresponding memory offsets. */
15092 for (i = 0; i < nops; i++)
15094 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15095 return false;
15097 if (i == 0)
15098 base = cur_base;
15099 else if (REGNO (base) != REGNO (cur_base))
15100 return false;
15102 offsets[i] = INTVAL (cur_offset);
15103 if (GET_CODE (operands[i]) == SUBREG)
15105 tmp = SUBREG_REG (operands[i]);
15106 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15107 operands[i] = tmp;
15111 /* Make sure there is no dependency between the individual loads. */
15112 if (load && REGNO (operands[0]) == REGNO (base))
15113 return false; /* RAW */
15115 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15116 return false; /* WAW */
15118 /* If the same input register is used in both stores
15119 when storing different constants, try to find a free register.
15120 For example, the code
15121 mov r0, 0
15122 str r0, [r2]
15123 mov r0, 1
15124 str r0, [r2, #4]
15125 can be transformed into
15126 mov r1, 0
15127 strd r1, r0, [r2]
15128 in Thumb mode assuming that r1 is free. */
15129 if (const_store
15130 && REGNO (operands[0]) == REGNO (operands[1])
15131 && INTVAL (operands[4]) != INTVAL (operands[5]))
15133 if (TARGET_THUMB2)
15135 CLEAR_HARD_REG_SET (regset);
15136 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15137 if (tmp == NULL_RTX)
15138 return false;
15140 /* Use the new register in the first load to ensure that
15141 if the original input register is not dead after peephole,
15142 then it will have the correct constant value. */
15143 operands[0] = tmp;
15145 else if (TARGET_ARM)
15147 return false;
15148 int regno = REGNO (operands[0]);
15149 if (!peep2_reg_dead_p (4, operands[0]))
15151 /* When the input register is even and is not dead after the
15152 pattern, it has to hold the second constant but we cannot
15153 form a legal STRD in ARM mode with this register as the second
15154 register. */
15155 if (regno % 2 == 0)
15156 return false;
15158 /* Is regno-1 free? */
15159 SET_HARD_REG_SET (regset);
15160 CLEAR_HARD_REG_BIT(regset, regno - 1);
15161 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15162 if (tmp == NULL_RTX)
15163 return false;
15165 operands[0] = tmp;
15167 else
15169 /* Find a DImode register. */
15170 CLEAR_HARD_REG_SET (regset);
15171 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15172 if (tmp != NULL_RTX)
15174 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15175 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15177 else
15179 /* Can we use the input register to form a DI register? */
15180 SET_HARD_REG_SET (regset);
15181 CLEAR_HARD_REG_BIT(regset,
15182 regno % 2 == 0 ? regno + 1 : regno - 1);
15183 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15184 if (tmp == NULL_RTX)
15185 return false;
15186 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15190 gcc_assert (operands[0] != NULL_RTX);
15191 gcc_assert (operands[1] != NULL_RTX);
15192 gcc_assert (REGNO (operands[0]) % 2 == 0);
15193 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15197 /* Make sure the instructions are ordered with lower memory access first. */
15198 if (offsets[0] > offsets[1])
15200 gap = offsets[0] - offsets[1];
15201 offset = offsets[1];
15203 /* Swap the instructions such that lower memory is accessed first. */
15204 SWAP_RTX (operands[0], operands[1]);
15205 SWAP_RTX (operands[2], operands[3]);
15206 if (const_store)
15207 SWAP_RTX (operands[4], operands[5]);
15209 else
15211 gap = offsets[1] - offsets[0];
15212 offset = offsets[0];
15215 /* Make sure accesses are to consecutive memory locations. */
15216 if (gap != 4)
15217 return false;
15219 /* Make sure we generate legal instructions. */
15220 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15221 false, load))
15222 return true;
15224 /* In Thumb state, where registers are almost unconstrained, there
15225 is little hope to fix it. */
15226 if (TARGET_THUMB2)
15227 return false;
15229 if (load && commute)
15231 /* Try reordering registers. */
15232 SWAP_RTX (operands[0], operands[1]);
15233 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15234 false, load))
15235 return true;
15238 if (const_store)
15240 /* If input registers are dead after this pattern, they can be
15241 reordered or replaced by other registers that are free in the
15242 current pattern. */
15243 if (!peep2_reg_dead_p (4, operands[0])
15244 || !peep2_reg_dead_p (4, operands[1]))
15245 return false;
15247 /* Try to reorder the input registers. */
15248 /* For example, the code
15249 mov r0, 0
15250 mov r1, 1
15251 str r1, [r2]
15252 str r0, [r2, #4]
15253 can be transformed into
15254 mov r1, 0
15255 mov r0, 1
15256 strd r0, [r2]
15258 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15259 false, false))
15261 SWAP_RTX (operands[0], operands[1]);
15262 return true;
15265 /* Try to find a free DI register. */
15266 CLEAR_HARD_REG_SET (regset);
15267 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15268 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15269 while (true)
15271 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15272 if (tmp == NULL_RTX)
15273 return false;
15275 /* DREG must be an even-numbered register in DImode.
15276 Split it into SI registers. */
15277 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15278 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15279 gcc_assert (operands[0] != NULL_RTX);
15280 gcc_assert (operands[1] != NULL_RTX);
15281 gcc_assert (REGNO (operands[0]) % 2 == 0);
15282 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15284 return (operands_ok_ldrd_strd (operands[0], operands[1],
15285 base, offset,
15286 false, load));
15290 return false;
15292 #undef SWAP_RTX
15297 /* Print a symbolic form of X to the debug file, F. */
15298 static void
15299 arm_print_value (FILE *f, rtx x)
15301 switch (GET_CODE (x))
15303 case CONST_INT:
15304 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15305 return;
15307 case CONST_DOUBLE:
15308 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15309 return;
15311 case CONST_VECTOR:
15313 int i;
15315 fprintf (f, "<");
15316 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15318 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15319 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15320 fputc (',', f);
15322 fprintf (f, ">");
15324 return;
15326 case CONST_STRING:
15327 fprintf (f, "\"%s\"", XSTR (x, 0));
15328 return;
15330 case SYMBOL_REF:
15331 fprintf (f, "`%s'", XSTR (x, 0));
15332 return;
15334 case LABEL_REF:
15335 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15336 return;
15338 case CONST:
15339 arm_print_value (f, XEXP (x, 0));
15340 return;
15342 case PLUS:
15343 arm_print_value (f, XEXP (x, 0));
15344 fprintf (f, "+");
15345 arm_print_value (f, XEXP (x, 1));
15346 return;
15348 case PC:
15349 fprintf (f, "pc");
15350 return;
15352 default:
15353 fprintf (f, "????");
15354 return;
15358 /* Routines for manipulation of the constant pool. */
15360 /* Arm instructions cannot load a large constant directly into a
15361 register; they have to come from a pc relative load. The constant
15362 must therefore be placed in the addressable range of the pc
15363 relative load. Depending on the precise pc relative load
15364 instruction the range is somewhere between 256 bytes and 4k. This
15365 means that we often have to dump a constant inside a function, and
15366 generate code to branch around it.
15368 It is important to minimize this, since the branches will slow
15369 things down and make the code larger.
15371 Normally we can hide the table after an existing unconditional
15372 branch so that there is no interruption of the flow, but in the
15373 worst case the code looks like this:
15375 ldr rn, L1
15377 b L2
15378 align
15379 L1: .long value
15383 ldr rn, L3
15385 b L4
15386 align
15387 L3: .long value
15391 We fix this by performing a scan after scheduling, which notices
15392 which instructions need to have their operands fetched from the
15393 constant table and builds the table.
15395 The algorithm starts by building a table of all the constants that
15396 need fixing up and all the natural barriers in the function (places
15397 where a constant table can be dropped without breaking the flow).
15398 For each fixup we note how far the pc-relative replacement will be
15399 able to reach and the offset of the instruction into the function.
15401 Having built the table we then group the fixes together to form
15402 tables that are as large as possible (subject to addressing
15403 constraints) and emit each table of constants after the last
15404 barrier that is within range of all the instructions in the group.
15405 If a group does not contain a barrier, then we forcibly create one
15406 by inserting a jump instruction into the flow. Once the table has
15407 been inserted, the insns are then modified to reference the
15408 relevant entry in the pool.
15410 Possible enhancements to the algorithm (not implemented) are:
15412 1) For some processors and object formats, there may be benefit in
15413 aligning the pools to the start of cache lines; this alignment
15414 would need to be taken into account when calculating addressability
15415 of a pool. */
15417 /* These typedefs are located at the start of this file, so that
15418 they can be used in the prototypes there. This comment is to
15419 remind readers of that fact so that the following structures
15420 can be understood more easily.
15422 typedef struct minipool_node Mnode;
15423 typedef struct minipool_fixup Mfix; */
15425 struct minipool_node
15427 /* Doubly linked chain of entries. */
15428 Mnode * next;
15429 Mnode * prev;
15430 /* The maximum offset into the code that this entry can be placed. While
15431 pushing fixes for forward references, all entries are sorted in order
15432 of increasing max_address. */
15433 HOST_WIDE_INT max_address;
15434 /* Similarly for an entry inserted for a backwards ref. */
15435 HOST_WIDE_INT min_address;
15436 /* The number of fixes referencing this entry. This can become zero
15437 if we "unpush" an entry. In this case we ignore the entry when we
15438 come to emit the code. */
15439 int refcount;
15440 /* The offset from the start of the minipool. */
15441 HOST_WIDE_INT offset;
15442 /* The value in table. */
15443 rtx value;
15444 /* The mode of value. */
15445 enum machine_mode mode;
15446 /* The size of the value. With iWMMXt enabled
15447 sizes > 4 also imply an alignment of 8-bytes. */
15448 int fix_size;
15451 struct minipool_fixup
15453 Mfix * next;
15454 rtx insn;
15455 HOST_WIDE_INT address;
15456 rtx * loc;
15457 enum machine_mode mode;
15458 int fix_size;
15459 rtx value;
15460 Mnode * minipool;
15461 HOST_WIDE_INT forwards;
15462 HOST_WIDE_INT backwards;
15465 /* Fixes less than a word need padding out to a word boundary. */
15466 #define MINIPOOL_FIX_SIZE(mode) \
15467 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15469 static Mnode * minipool_vector_head;
15470 static Mnode * minipool_vector_tail;
15471 static rtx minipool_vector_label;
15472 static int minipool_pad;
15474 /* The linked list of all minipool fixes required for this function. */
15475 Mfix * minipool_fix_head;
15476 Mfix * minipool_fix_tail;
15477 /* The fix entry for the current minipool, once it has been placed. */
15478 Mfix * minipool_barrier;
15480 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15481 #define JUMP_TABLES_IN_TEXT_SECTION 0
15482 #endif
15484 static HOST_WIDE_INT
15485 get_jump_table_size (rtx insn)
15487 /* ADDR_VECs only take room if read-only data does into the text
15488 section. */
15489 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15491 rtx body = PATTERN (insn);
15492 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15493 HOST_WIDE_INT size;
15494 HOST_WIDE_INT modesize;
15496 modesize = GET_MODE_SIZE (GET_MODE (body));
15497 size = modesize * XVECLEN (body, elt);
15498 switch (modesize)
15500 case 1:
15501 /* Round up size of TBB table to a halfword boundary. */
15502 size = (size + 1) & ~(HOST_WIDE_INT)1;
15503 break;
15504 case 2:
15505 /* No padding necessary for TBH. */
15506 break;
15507 case 4:
15508 /* Add two bytes for alignment on Thumb. */
15509 if (TARGET_THUMB)
15510 size += 2;
15511 break;
15512 default:
15513 gcc_unreachable ();
15515 return size;
15518 return 0;
15521 /* Return the maximum amount of padding that will be inserted before
15522 label LABEL. */
15524 static HOST_WIDE_INT
15525 get_label_padding (rtx label)
15527 HOST_WIDE_INT align, min_insn_size;
15529 align = 1 << label_to_alignment (label);
15530 min_insn_size = TARGET_THUMB ? 2 : 4;
15531 return align > min_insn_size ? align - min_insn_size : 0;
15534 /* Move a minipool fix MP from its current location to before MAX_MP.
15535 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15536 constraints may need updating. */
15537 static Mnode *
15538 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15539 HOST_WIDE_INT max_address)
15541 /* The code below assumes these are different. */
15542 gcc_assert (mp != max_mp);
15544 if (max_mp == NULL)
15546 if (max_address < mp->max_address)
15547 mp->max_address = max_address;
15549 else
15551 if (max_address > max_mp->max_address - mp->fix_size)
15552 mp->max_address = max_mp->max_address - mp->fix_size;
15553 else
15554 mp->max_address = max_address;
15556 /* Unlink MP from its current position. Since max_mp is non-null,
15557 mp->prev must be non-null. */
15558 mp->prev->next = mp->next;
15559 if (mp->next != NULL)
15560 mp->next->prev = mp->prev;
15561 else
15562 minipool_vector_tail = mp->prev;
15564 /* Re-insert it before MAX_MP. */
15565 mp->next = max_mp;
15566 mp->prev = max_mp->prev;
15567 max_mp->prev = mp;
15569 if (mp->prev != NULL)
15570 mp->prev->next = mp;
15571 else
15572 minipool_vector_head = mp;
15575 /* Save the new entry. */
15576 max_mp = mp;
15578 /* Scan over the preceding entries and adjust their addresses as
15579 required. */
15580 while (mp->prev != NULL
15581 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15583 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15584 mp = mp->prev;
15587 return max_mp;
15590 /* Add a constant to the minipool for a forward reference. Returns the
15591 node added or NULL if the constant will not fit in this pool. */
15592 static Mnode *
15593 add_minipool_forward_ref (Mfix *fix)
15595 /* If set, max_mp is the first pool_entry that has a lower
15596 constraint than the one we are trying to add. */
15597 Mnode * max_mp = NULL;
15598 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15599 Mnode * mp;
15601 /* If the minipool starts before the end of FIX->INSN then this FIX
15602 can not be placed into the current pool. Furthermore, adding the
15603 new constant pool entry may cause the pool to start FIX_SIZE bytes
15604 earlier. */
15605 if (minipool_vector_head &&
15606 (fix->address + get_attr_length (fix->insn)
15607 >= minipool_vector_head->max_address - fix->fix_size))
15608 return NULL;
15610 /* Scan the pool to see if a constant with the same value has
15611 already been added. While we are doing this, also note the
15612 location where we must insert the constant if it doesn't already
15613 exist. */
15614 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15616 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15617 && fix->mode == mp->mode
15618 && (!LABEL_P (fix->value)
15619 || (CODE_LABEL_NUMBER (fix->value)
15620 == CODE_LABEL_NUMBER (mp->value)))
15621 && rtx_equal_p (fix->value, mp->value))
15623 /* More than one fix references this entry. */
15624 mp->refcount++;
15625 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15628 /* Note the insertion point if necessary. */
15629 if (max_mp == NULL
15630 && mp->max_address > max_address)
15631 max_mp = mp;
15633 /* If we are inserting an 8-bytes aligned quantity and
15634 we have not already found an insertion point, then
15635 make sure that all such 8-byte aligned quantities are
15636 placed at the start of the pool. */
15637 if (ARM_DOUBLEWORD_ALIGN
15638 && max_mp == NULL
15639 && fix->fix_size >= 8
15640 && mp->fix_size < 8)
15642 max_mp = mp;
15643 max_address = mp->max_address;
15647 /* The value is not currently in the minipool, so we need to create
15648 a new entry for it. If MAX_MP is NULL, the entry will be put on
15649 the end of the list since the placement is less constrained than
15650 any existing entry. Otherwise, we insert the new fix before
15651 MAX_MP and, if necessary, adjust the constraints on the other
15652 entries. */
15653 mp = XNEW (Mnode);
15654 mp->fix_size = fix->fix_size;
15655 mp->mode = fix->mode;
15656 mp->value = fix->value;
15657 mp->refcount = 1;
15658 /* Not yet required for a backwards ref. */
15659 mp->min_address = -65536;
15661 if (max_mp == NULL)
15663 mp->max_address = max_address;
15664 mp->next = NULL;
15665 mp->prev = minipool_vector_tail;
15667 if (mp->prev == NULL)
15669 minipool_vector_head = mp;
15670 minipool_vector_label = gen_label_rtx ();
15672 else
15673 mp->prev->next = mp;
15675 minipool_vector_tail = mp;
15677 else
15679 if (max_address > max_mp->max_address - mp->fix_size)
15680 mp->max_address = max_mp->max_address - mp->fix_size;
15681 else
15682 mp->max_address = max_address;
15684 mp->next = max_mp;
15685 mp->prev = max_mp->prev;
15686 max_mp->prev = mp;
15687 if (mp->prev != NULL)
15688 mp->prev->next = mp;
15689 else
15690 minipool_vector_head = mp;
15693 /* Save the new entry. */
15694 max_mp = mp;
15696 /* Scan over the preceding entries and adjust their addresses as
15697 required. */
15698 while (mp->prev != NULL
15699 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15701 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15702 mp = mp->prev;
15705 return max_mp;
15708 static Mnode *
15709 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15710 HOST_WIDE_INT min_address)
15712 HOST_WIDE_INT offset;
15714 /* The code below assumes these are different. */
15715 gcc_assert (mp != min_mp);
15717 if (min_mp == NULL)
15719 if (min_address > mp->min_address)
15720 mp->min_address = min_address;
15722 else
15724 /* We will adjust this below if it is too loose. */
15725 mp->min_address = min_address;
15727 /* Unlink MP from its current position. Since min_mp is non-null,
15728 mp->next must be non-null. */
15729 mp->next->prev = mp->prev;
15730 if (mp->prev != NULL)
15731 mp->prev->next = mp->next;
15732 else
15733 minipool_vector_head = mp->next;
15735 /* Reinsert it after MIN_MP. */
15736 mp->prev = min_mp;
15737 mp->next = min_mp->next;
15738 min_mp->next = mp;
15739 if (mp->next != NULL)
15740 mp->next->prev = mp;
15741 else
15742 minipool_vector_tail = mp;
15745 min_mp = mp;
15747 offset = 0;
15748 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15750 mp->offset = offset;
15751 if (mp->refcount > 0)
15752 offset += mp->fix_size;
15754 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15755 mp->next->min_address = mp->min_address + mp->fix_size;
15758 return min_mp;
15761 /* Add a constant to the minipool for a backward reference. Returns the
15762 node added or NULL if the constant will not fit in this pool.
15764 Note that the code for insertion for a backwards reference can be
15765 somewhat confusing because the calculated offsets for each fix do
15766 not take into account the size of the pool (which is still under
15767 construction. */
15768 static Mnode *
15769 add_minipool_backward_ref (Mfix *fix)
15771 /* If set, min_mp is the last pool_entry that has a lower constraint
15772 than the one we are trying to add. */
15773 Mnode *min_mp = NULL;
15774 /* This can be negative, since it is only a constraint. */
15775 HOST_WIDE_INT min_address = fix->address - fix->backwards;
15776 Mnode *mp;
15778 /* If we can't reach the current pool from this insn, or if we can't
15779 insert this entry at the end of the pool without pushing other
15780 fixes out of range, then we don't try. This ensures that we
15781 can't fail later on. */
15782 if (min_address >= minipool_barrier->address
15783 || (minipool_vector_tail->min_address + fix->fix_size
15784 >= minipool_barrier->address))
15785 return NULL;
15787 /* Scan the pool to see if a constant with the same value has
15788 already been added. While we are doing this, also note the
15789 location where we must insert the constant if it doesn't already
15790 exist. */
15791 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15793 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15794 && fix->mode == mp->mode
15795 && (!LABEL_P (fix->value)
15796 || (CODE_LABEL_NUMBER (fix->value)
15797 == CODE_LABEL_NUMBER (mp->value)))
15798 && rtx_equal_p (fix->value, mp->value)
15799 /* Check that there is enough slack to move this entry to the
15800 end of the table (this is conservative). */
15801 && (mp->max_address
15802 > (minipool_barrier->address
15803 + minipool_vector_tail->offset
15804 + minipool_vector_tail->fix_size)))
15806 mp->refcount++;
15807 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15810 if (min_mp != NULL)
15811 mp->min_address += fix->fix_size;
15812 else
15814 /* Note the insertion point if necessary. */
15815 if (mp->min_address < min_address)
15817 /* For now, we do not allow the insertion of 8-byte alignment
15818 requiring nodes anywhere but at the start of the pool. */
15819 if (ARM_DOUBLEWORD_ALIGN
15820 && fix->fix_size >= 8 && mp->fix_size < 8)
15821 return NULL;
15822 else
15823 min_mp = mp;
15825 else if (mp->max_address
15826 < minipool_barrier->address + mp->offset + fix->fix_size)
15828 /* Inserting before this entry would push the fix beyond
15829 its maximum address (which can happen if we have
15830 re-located a forwards fix); force the new fix to come
15831 after it. */
15832 if (ARM_DOUBLEWORD_ALIGN
15833 && fix->fix_size >= 8 && mp->fix_size < 8)
15834 return NULL;
15835 else
15837 min_mp = mp;
15838 min_address = mp->min_address + fix->fix_size;
15841 /* Do not insert a non-8-byte aligned quantity before 8-byte
15842 aligned quantities. */
15843 else if (ARM_DOUBLEWORD_ALIGN
15844 && fix->fix_size < 8
15845 && mp->fix_size >= 8)
15847 min_mp = mp;
15848 min_address = mp->min_address + fix->fix_size;
15853 /* We need to create a new entry. */
15854 mp = XNEW (Mnode);
15855 mp->fix_size = fix->fix_size;
15856 mp->mode = fix->mode;
15857 mp->value = fix->value;
15858 mp->refcount = 1;
15859 mp->max_address = minipool_barrier->address + 65536;
15861 mp->min_address = min_address;
15863 if (min_mp == NULL)
15865 mp->prev = NULL;
15866 mp->next = minipool_vector_head;
15868 if (mp->next == NULL)
15870 minipool_vector_tail = mp;
15871 minipool_vector_label = gen_label_rtx ();
15873 else
15874 mp->next->prev = mp;
15876 minipool_vector_head = mp;
15878 else
15880 mp->next = min_mp->next;
15881 mp->prev = min_mp;
15882 min_mp->next = mp;
15884 if (mp->next != NULL)
15885 mp->next->prev = mp;
15886 else
15887 minipool_vector_tail = mp;
15890 /* Save the new entry. */
15891 min_mp = mp;
15893 if (mp->prev)
15894 mp = mp->prev;
15895 else
15896 mp->offset = 0;
15898 /* Scan over the following entries and adjust their offsets. */
15899 while (mp->next != NULL)
15901 if (mp->next->min_address < mp->min_address + mp->fix_size)
15902 mp->next->min_address = mp->min_address + mp->fix_size;
15904 if (mp->refcount)
15905 mp->next->offset = mp->offset + mp->fix_size;
15906 else
15907 mp->next->offset = mp->offset;
15909 mp = mp->next;
15912 return min_mp;
15915 static void
15916 assign_minipool_offsets (Mfix *barrier)
15918 HOST_WIDE_INT offset = 0;
15919 Mnode *mp;
15921 minipool_barrier = barrier;
15923 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15925 mp->offset = offset;
15927 if (mp->refcount > 0)
15928 offset += mp->fix_size;
15932 /* Output the literal table */
15933 static void
15934 dump_minipool (rtx scan)
15936 Mnode * mp;
15937 Mnode * nmp;
15938 int align64 = 0;
15940 if (ARM_DOUBLEWORD_ALIGN)
15941 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15942 if (mp->refcount > 0 && mp->fix_size >= 8)
15944 align64 = 1;
15945 break;
15948 if (dump_file)
15949 fprintf (dump_file,
15950 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15951 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
15953 scan = emit_label_after (gen_label_rtx (), scan);
15954 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
15955 scan = emit_label_after (minipool_vector_label, scan);
15957 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
15959 if (mp->refcount > 0)
15961 if (dump_file)
15963 fprintf (dump_file,
15964 ";; Offset %u, min %ld, max %ld ",
15965 (unsigned) mp->offset, (unsigned long) mp->min_address,
15966 (unsigned long) mp->max_address);
15967 arm_print_value (dump_file, mp->value);
15968 fputc ('\n', dump_file);
15971 switch (mp->fix_size)
15973 #ifdef HAVE_consttable_1
15974 case 1:
15975 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
15976 break;
15978 #endif
15979 #ifdef HAVE_consttable_2
15980 case 2:
15981 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
15982 break;
15984 #endif
15985 #ifdef HAVE_consttable_4
15986 case 4:
15987 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
15988 break;
15990 #endif
15991 #ifdef HAVE_consttable_8
15992 case 8:
15993 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
15994 break;
15996 #endif
15997 #ifdef HAVE_consttable_16
15998 case 16:
15999 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16000 break;
16002 #endif
16003 default:
16004 gcc_unreachable ();
16008 nmp = mp->next;
16009 free (mp);
16012 minipool_vector_head = minipool_vector_tail = NULL;
16013 scan = emit_insn_after (gen_consttable_end (), scan);
16014 scan = emit_barrier_after (scan);
16017 /* Return the cost of forcibly inserting a barrier after INSN. */
16018 static int
16019 arm_barrier_cost (rtx insn)
16021 /* Basing the location of the pool on the loop depth is preferable,
16022 but at the moment, the basic block information seems to be
16023 corrupt by this stage of the compilation. */
16024 int base_cost = 50;
16025 rtx next = next_nonnote_insn (insn);
16027 if (next != NULL && LABEL_P (next))
16028 base_cost -= 20;
16030 switch (GET_CODE (insn))
16032 case CODE_LABEL:
16033 /* It will always be better to place the table before the label, rather
16034 than after it. */
16035 return 50;
16037 case INSN:
16038 case CALL_INSN:
16039 return base_cost;
16041 case JUMP_INSN:
16042 return base_cost - 10;
16044 default:
16045 return base_cost + 10;
16049 /* Find the best place in the insn stream in the range
16050 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16051 Create the barrier by inserting a jump and add a new fix entry for
16052 it. */
16053 static Mfix *
16054 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16056 HOST_WIDE_INT count = 0;
16057 rtx barrier;
16058 rtx from = fix->insn;
16059 /* The instruction after which we will insert the jump. */
16060 rtx selected = NULL;
16061 int selected_cost;
16062 /* The address at which the jump instruction will be placed. */
16063 HOST_WIDE_INT selected_address;
16064 Mfix * new_fix;
16065 HOST_WIDE_INT max_count = max_address - fix->address;
16066 rtx label = gen_label_rtx ();
16068 selected_cost = arm_barrier_cost (from);
16069 selected_address = fix->address;
16071 while (from && count < max_count)
16073 rtx tmp;
16074 int new_cost;
16076 /* This code shouldn't have been called if there was a natural barrier
16077 within range. */
16078 gcc_assert (!BARRIER_P (from));
16080 /* Count the length of this insn. This must stay in sync with the
16081 code that pushes minipool fixes. */
16082 if (LABEL_P (from))
16083 count += get_label_padding (from);
16084 else
16085 count += get_attr_length (from);
16087 /* If there is a jump table, add its length. */
16088 if (tablejump_p (from, NULL, &tmp))
16090 count += get_jump_table_size (tmp);
16092 /* Jump tables aren't in a basic block, so base the cost on
16093 the dispatch insn. If we select this location, we will
16094 still put the pool after the table. */
16095 new_cost = arm_barrier_cost (from);
16097 if (count < max_count
16098 && (!selected || new_cost <= selected_cost))
16100 selected = tmp;
16101 selected_cost = new_cost;
16102 selected_address = fix->address + count;
16105 /* Continue after the dispatch table. */
16106 from = NEXT_INSN (tmp);
16107 continue;
16110 new_cost = arm_barrier_cost (from);
16112 if (count < max_count
16113 && (!selected || new_cost <= selected_cost))
16115 selected = from;
16116 selected_cost = new_cost;
16117 selected_address = fix->address + count;
16120 from = NEXT_INSN (from);
16123 /* Make sure that we found a place to insert the jump. */
16124 gcc_assert (selected);
16126 /* Make sure we do not split a call and its corresponding
16127 CALL_ARG_LOCATION note. */
16128 if (CALL_P (selected))
16130 rtx next = NEXT_INSN (selected);
16131 if (next && NOTE_P (next)
16132 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16133 selected = next;
16136 /* Create a new JUMP_INSN that branches around a barrier. */
16137 from = emit_jump_insn_after (gen_jump (label), selected);
16138 JUMP_LABEL (from) = label;
16139 barrier = emit_barrier_after (from);
16140 emit_label_after (label, barrier);
16142 /* Create a minipool barrier entry for the new barrier. */
16143 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16144 new_fix->insn = barrier;
16145 new_fix->address = selected_address;
16146 new_fix->next = fix->next;
16147 fix->next = new_fix;
16149 return new_fix;
16152 /* Record that there is a natural barrier in the insn stream at
16153 ADDRESS. */
16154 static void
16155 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16157 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16159 fix->insn = insn;
16160 fix->address = address;
16162 fix->next = NULL;
16163 if (minipool_fix_head != NULL)
16164 minipool_fix_tail->next = fix;
16165 else
16166 minipool_fix_head = fix;
16168 minipool_fix_tail = fix;
16171 /* Record INSN, which will need fixing up to load a value from the
16172 minipool. ADDRESS is the offset of the insn since the start of the
16173 function; LOC is a pointer to the part of the insn which requires
16174 fixing; VALUE is the constant that must be loaded, which is of type
16175 MODE. */
16176 static void
16177 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16178 enum machine_mode mode, rtx value)
16180 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16182 fix->insn = insn;
16183 fix->address = address;
16184 fix->loc = loc;
16185 fix->mode = mode;
16186 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16187 fix->value = value;
16188 fix->forwards = get_attr_pool_range (insn);
16189 fix->backwards = get_attr_neg_pool_range (insn);
16190 fix->minipool = NULL;
16192 /* If an insn doesn't have a range defined for it, then it isn't
16193 expecting to be reworked by this code. Better to stop now than
16194 to generate duff assembly code. */
16195 gcc_assert (fix->forwards || fix->backwards);
16197 /* If an entry requires 8-byte alignment then assume all constant pools
16198 require 4 bytes of padding. Trying to do this later on a per-pool
16199 basis is awkward because existing pool entries have to be modified. */
16200 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16201 minipool_pad = 4;
16203 if (dump_file)
16205 fprintf (dump_file,
16206 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16207 GET_MODE_NAME (mode),
16208 INSN_UID (insn), (unsigned long) address,
16209 -1 * (long)fix->backwards, (long)fix->forwards);
16210 arm_print_value (dump_file, fix->value);
16211 fprintf (dump_file, "\n");
16214 /* Add it to the chain of fixes. */
16215 fix->next = NULL;
16217 if (minipool_fix_head != NULL)
16218 minipool_fix_tail->next = fix;
16219 else
16220 minipool_fix_head = fix;
16222 minipool_fix_tail = fix;
16225 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16226 Returns the number of insns needed, or 99 if we don't know how to
16227 do it. */
16229 arm_const_double_inline_cost (rtx val)
16231 rtx lowpart, highpart;
16232 enum machine_mode mode;
16234 mode = GET_MODE (val);
16236 if (mode == VOIDmode)
16237 mode = DImode;
16239 gcc_assert (GET_MODE_SIZE (mode) == 8);
16241 lowpart = gen_lowpart (SImode, val);
16242 highpart = gen_highpart_mode (SImode, mode, val);
16244 gcc_assert (CONST_INT_P (lowpart));
16245 gcc_assert (CONST_INT_P (highpart));
16247 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16248 NULL_RTX, NULL_RTX, 0, 0)
16249 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16250 NULL_RTX, NULL_RTX, 0, 0));
16253 /* Return true if it is worthwhile to split a 64-bit constant into two
16254 32-bit operations. This is the case if optimizing for size, or
16255 if we have load delay slots, or if one 32-bit part can be done with
16256 a single data operation. */
16257 bool
16258 arm_const_double_by_parts (rtx val)
16260 enum machine_mode mode = GET_MODE (val);
16261 rtx part;
16263 if (optimize_size || arm_ld_sched)
16264 return true;
16266 if (mode == VOIDmode)
16267 mode = DImode;
16269 part = gen_highpart_mode (SImode, mode, val);
16271 gcc_assert (CONST_INT_P (part));
16273 if (const_ok_for_arm (INTVAL (part))
16274 || const_ok_for_arm (~INTVAL (part)))
16275 return true;
16277 part = gen_lowpart (SImode, val);
16279 gcc_assert (CONST_INT_P (part));
16281 if (const_ok_for_arm (INTVAL (part))
16282 || const_ok_for_arm (~INTVAL (part)))
16283 return true;
16285 return false;
16288 /* Return true if it is possible to inline both the high and low parts
16289 of a 64-bit constant into 32-bit data processing instructions. */
16290 bool
16291 arm_const_double_by_immediates (rtx val)
16293 enum machine_mode mode = GET_MODE (val);
16294 rtx part;
16296 if (mode == VOIDmode)
16297 mode = DImode;
16299 part = gen_highpart_mode (SImode, mode, val);
16301 gcc_assert (CONST_INT_P (part));
16303 if (!const_ok_for_arm (INTVAL (part)))
16304 return false;
16306 part = gen_lowpart (SImode, val);
16308 gcc_assert (CONST_INT_P (part));
16310 if (!const_ok_for_arm (INTVAL (part)))
16311 return false;
16313 return true;
16316 /* Scan INSN and note any of its operands that need fixing.
16317 If DO_PUSHES is false we do not actually push any of the fixups
16318 needed. */
16319 static void
16320 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16322 int opno;
16324 extract_insn (insn);
16326 if (!constrain_operands (1))
16327 fatal_insn_not_found (insn);
16329 if (recog_data.n_alternatives == 0)
16330 return;
16332 /* Fill in recog_op_alt with information about the constraints of
16333 this insn. */
16334 preprocess_constraints ();
16336 for (opno = 0; opno < recog_data.n_operands; opno++)
16338 /* Things we need to fix can only occur in inputs. */
16339 if (recog_data.operand_type[opno] != OP_IN)
16340 continue;
16342 /* If this alternative is a memory reference, then any mention
16343 of constants in this alternative is really to fool reload
16344 into allowing us to accept one there. We need to fix them up
16345 now so that we output the right code. */
16346 if (recog_op_alt[opno][which_alternative].memory_ok)
16348 rtx op = recog_data.operand[opno];
16350 if (CONSTANT_P (op))
16352 if (do_pushes)
16353 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16354 recog_data.operand_mode[opno], op);
16356 else if (MEM_P (op)
16357 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16358 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16360 if (do_pushes)
16362 rtx cop = avoid_constant_pool_reference (op);
16364 /* Casting the address of something to a mode narrower
16365 than a word can cause avoid_constant_pool_reference()
16366 to return the pool reference itself. That's no good to
16367 us here. Lets just hope that we can use the
16368 constant pool value directly. */
16369 if (op == cop)
16370 cop = get_pool_constant (XEXP (op, 0));
16372 push_minipool_fix (insn, address,
16373 recog_data.operand_loc[opno],
16374 recog_data.operand_mode[opno], cop);
16381 return;
16384 /* Rewrite move insn into subtract of 0 if the condition codes will
16385 be useful in next conditional jump insn. */
16387 static void
16388 thumb1_reorg (void)
16390 basic_block bb;
16392 FOR_EACH_BB (bb)
16394 rtx dest, src;
16395 rtx pat, op0, set = NULL;
16396 rtx prev, insn = BB_END (bb);
16397 bool insn_clobbered = false;
16399 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16400 insn = PREV_INSN (insn);
16402 /* Find the last cbranchsi4_insn in basic block BB. */
16403 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16404 continue;
16406 /* Get the register with which we are comparing. */
16407 pat = PATTERN (insn);
16408 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16410 /* Find the first flag setting insn before INSN in basic block BB. */
16411 gcc_assert (insn != BB_HEAD (bb));
16412 for (prev = PREV_INSN (insn);
16413 (!insn_clobbered
16414 && prev != BB_HEAD (bb)
16415 && (NOTE_P (prev)
16416 || DEBUG_INSN_P (prev)
16417 || ((set = single_set (prev)) != NULL
16418 && get_attr_conds (prev) == CONDS_NOCOND)));
16419 prev = PREV_INSN (prev))
16421 if (reg_set_p (op0, prev))
16422 insn_clobbered = true;
16425 /* Skip if op0 is clobbered by insn other than prev. */
16426 if (insn_clobbered)
16427 continue;
16429 if (!set)
16430 continue;
16432 dest = SET_DEST (set);
16433 src = SET_SRC (set);
16434 if (!low_register_operand (dest, SImode)
16435 || !low_register_operand (src, SImode))
16436 continue;
16438 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16439 in INSN. Both src and dest of the move insn are checked. */
16440 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16442 dest = copy_rtx (dest);
16443 src = copy_rtx (src);
16444 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16445 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16446 INSN_CODE (prev) = -1;
16447 /* Set test register in INSN to dest. */
16448 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16449 INSN_CODE (insn) = -1;
16454 /* Convert instructions to their cc-clobbering variant if possible, since
16455 that allows us to use smaller encodings. */
16457 static void
16458 thumb2_reorg (void)
16460 basic_block bb;
16461 regset_head live;
16463 INIT_REG_SET (&live);
16465 /* We are freeing block_for_insn in the toplev to keep compatibility
16466 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16467 compute_bb_for_insn ();
16468 df_analyze ();
16470 FOR_EACH_BB (bb)
16472 rtx insn;
16474 COPY_REG_SET (&live, DF_LR_OUT (bb));
16475 df_simulate_initialize_backwards (bb, &live);
16476 FOR_BB_INSNS_REVERSE (bb, insn)
16478 if (NONJUMP_INSN_P (insn)
16479 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16480 && GET_CODE (PATTERN (insn)) == SET)
16482 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16483 rtx pat = PATTERN (insn);
16484 rtx dst = XEXP (pat, 0);
16485 rtx src = XEXP (pat, 1);
16486 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16488 if (!OBJECT_P (src))
16489 op0 = XEXP (src, 0);
16491 if (BINARY_P (src))
16492 op1 = XEXP (src, 1);
16494 if (low_register_operand (dst, SImode))
16496 switch (GET_CODE (src))
16498 case PLUS:
16499 /* Adding two registers and storing the result
16500 in the first source is already a 16-bit
16501 operation. */
16502 if (rtx_equal_p (dst, op0)
16503 && register_operand (op1, SImode))
16504 break;
16506 if (low_register_operand (op0, SImode))
16508 /* ADDS <Rd>,<Rn>,<Rm> */
16509 if (low_register_operand (op1, SImode))
16510 action = CONV;
16511 /* ADDS <Rdn>,#<imm8> */
16512 /* SUBS <Rdn>,#<imm8> */
16513 else if (rtx_equal_p (dst, op0)
16514 && CONST_INT_P (op1)
16515 && IN_RANGE (INTVAL (op1), -255, 255))
16516 action = CONV;
16517 /* ADDS <Rd>,<Rn>,#<imm3> */
16518 /* SUBS <Rd>,<Rn>,#<imm3> */
16519 else if (CONST_INT_P (op1)
16520 && IN_RANGE (INTVAL (op1), -7, 7))
16521 action = CONV;
16523 /* ADCS <Rd>, <Rn> */
16524 else if (GET_CODE (XEXP (src, 0)) == PLUS
16525 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16526 && low_register_operand (XEXP (XEXP (src, 0), 1),
16527 SImode)
16528 && COMPARISON_P (op1)
16529 && cc_register (XEXP (op1, 0), VOIDmode)
16530 && maybe_get_arm_condition_code (op1) == ARM_CS
16531 && XEXP (op1, 1) == const0_rtx)
16532 action = CONV;
16533 break;
16535 case MINUS:
16536 /* RSBS <Rd>,<Rn>,#0
16537 Not handled here: see NEG below. */
16538 /* SUBS <Rd>,<Rn>,#<imm3>
16539 SUBS <Rdn>,#<imm8>
16540 Not handled here: see PLUS above. */
16541 /* SUBS <Rd>,<Rn>,<Rm> */
16542 if (low_register_operand (op0, SImode)
16543 && low_register_operand (op1, SImode))
16544 action = CONV;
16545 break;
16547 case MULT:
16548 /* MULS <Rdm>,<Rn>,<Rdm>
16549 As an exception to the rule, this is only used
16550 when optimizing for size since MULS is slow on all
16551 known implementations. We do not even want to use
16552 MULS in cold code, if optimizing for speed, so we
16553 test the global flag here. */
16554 if (!optimize_size)
16555 break;
16556 /* else fall through. */
16557 case AND:
16558 case IOR:
16559 case XOR:
16560 /* ANDS <Rdn>,<Rm> */
16561 if (rtx_equal_p (dst, op0)
16562 && low_register_operand (op1, SImode))
16563 action = CONV;
16564 else if (rtx_equal_p (dst, op1)
16565 && low_register_operand (op0, SImode))
16566 action = SWAP_CONV;
16567 break;
16569 case ASHIFTRT:
16570 case ASHIFT:
16571 case LSHIFTRT:
16572 /* ASRS <Rdn>,<Rm> */
16573 /* LSRS <Rdn>,<Rm> */
16574 /* LSLS <Rdn>,<Rm> */
16575 if (rtx_equal_p (dst, op0)
16576 && low_register_operand (op1, SImode))
16577 action = CONV;
16578 /* ASRS <Rd>,<Rm>,#<imm5> */
16579 /* LSRS <Rd>,<Rm>,#<imm5> */
16580 /* LSLS <Rd>,<Rm>,#<imm5> */
16581 else if (low_register_operand (op0, SImode)
16582 && CONST_INT_P (op1)
16583 && IN_RANGE (INTVAL (op1), 0, 31))
16584 action = CONV;
16585 break;
16587 case ROTATERT:
16588 /* RORS <Rdn>,<Rm> */
16589 if (rtx_equal_p (dst, op0)
16590 && low_register_operand (op1, SImode))
16591 action = CONV;
16592 break;
16594 case NOT:
16595 case NEG:
16596 /* MVNS <Rd>,<Rm> */
16597 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16598 if (low_register_operand (op0, SImode))
16599 action = CONV;
16600 break;
16602 case CONST_INT:
16603 /* MOVS <Rd>,#<imm8> */
16604 if (CONST_INT_P (src)
16605 && IN_RANGE (INTVAL (src), 0, 255))
16606 action = CONV;
16607 break;
16609 case REG:
16610 /* MOVS and MOV<c> with registers have different
16611 encodings, so are not relevant here. */
16612 break;
16614 default:
16615 break;
16619 if (action != SKIP)
16621 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16622 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16623 rtvec vec;
16625 if (action == SWAP_CONV)
16627 src = copy_rtx (src);
16628 XEXP (src, 0) = op1;
16629 XEXP (src, 1) = op0;
16630 pat = gen_rtx_SET (VOIDmode, dst, src);
16631 vec = gen_rtvec (2, pat, clobber);
16633 else /* action == CONV */
16634 vec = gen_rtvec (2, pat, clobber);
16636 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16637 INSN_CODE (insn) = -1;
16641 if (NONDEBUG_INSN_P (insn))
16642 df_simulate_one_insn_backwards (bb, insn, &live);
16646 CLEAR_REG_SET (&live);
16649 /* Gcc puts the pool in the wrong place for ARM, since we can only
16650 load addresses a limited distance around the pc. We do some
16651 special munging to move the constant pool values to the correct
16652 point in the code. */
16653 static void
16654 arm_reorg (void)
16656 rtx insn;
16657 HOST_WIDE_INT address = 0;
16658 Mfix * fix;
16660 if (TARGET_THUMB1)
16661 thumb1_reorg ();
16662 else if (TARGET_THUMB2)
16663 thumb2_reorg ();
16665 /* Ensure all insns that must be split have been split at this point.
16666 Otherwise, the pool placement code below may compute incorrect
16667 insn lengths. Note that when optimizing, all insns have already
16668 been split at this point. */
16669 if (!optimize)
16670 split_all_insns_noflow ();
16672 minipool_fix_head = minipool_fix_tail = NULL;
16674 /* The first insn must always be a note, or the code below won't
16675 scan it properly. */
16676 insn = get_insns ();
16677 gcc_assert (NOTE_P (insn));
16678 minipool_pad = 0;
16680 /* Scan all the insns and record the operands that will need fixing. */
16681 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16683 if (BARRIER_P (insn))
16684 push_minipool_barrier (insn, address);
16685 else if (INSN_P (insn))
16687 rtx table;
16689 note_invalid_constants (insn, address, true);
16690 address += get_attr_length (insn);
16692 /* If the insn is a vector jump, add the size of the table
16693 and skip the table. */
16694 if (tablejump_p (insn, NULL, &table))
16696 address += get_jump_table_size (table);
16697 insn = table;
16700 else if (LABEL_P (insn))
16701 /* Add the worst-case padding due to alignment. We don't add
16702 the _current_ padding because the minipool insertions
16703 themselves might change it. */
16704 address += get_label_padding (insn);
16707 fix = minipool_fix_head;
16709 /* Now scan the fixups and perform the required changes. */
16710 while (fix)
16712 Mfix * ftmp;
16713 Mfix * fdel;
16714 Mfix * last_added_fix;
16715 Mfix * last_barrier = NULL;
16716 Mfix * this_fix;
16718 /* Skip any further barriers before the next fix. */
16719 while (fix && BARRIER_P (fix->insn))
16720 fix = fix->next;
16722 /* No more fixes. */
16723 if (fix == NULL)
16724 break;
16726 last_added_fix = NULL;
16728 for (ftmp = fix; ftmp; ftmp = ftmp->next)
16730 if (BARRIER_P (ftmp->insn))
16732 if (ftmp->address >= minipool_vector_head->max_address)
16733 break;
16735 last_barrier = ftmp;
16737 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
16738 break;
16740 last_added_fix = ftmp; /* Keep track of the last fix added. */
16743 /* If we found a barrier, drop back to that; any fixes that we
16744 could have reached but come after the barrier will now go in
16745 the next mini-pool. */
16746 if (last_barrier != NULL)
16748 /* Reduce the refcount for those fixes that won't go into this
16749 pool after all. */
16750 for (fdel = last_barrier->next;
16751 fdel && fdel != ftmp;
16752 fdel = fdel->next)
16754 fdel->minipool->refcount--;
16755 fdel->minipool = NULL;
16758 ftmp = last_barrier;
16760 else
16762 /* ftmp is first fix that we can't fit into this pool and
16763 there no natural barriers that we could use. Insert a
16764 new barrier in the code somewhere between the previous
16765 fix and this one, and arrange to jump around it. */
16766 HOST_WIDE_INT max_address;
16768 /* The last item on the list of fixes must be a barrier, so
16769 we can never run off the end of the list of fixes without
16770 last_barrier being set. */
16771 gcc_assert (ftmp);
16773 max_address = minipool_vector_head->max_address;
16774 /* Check that there isn't another fix that is in range that
16775 we couldn't fit into this pool because the pool was
16776 already too large: we need to put the pool before such an
16777 instruction. The pool itself may come just after the
16778 fix because create_fix_barrier also allows space for a
16779 jump instruction. */
16780 if (ftmp->address < max_address)
16781 max_address = ftmp->address + 1;
16783 last_barrier = create_fix_barrier (last_added_fix, max_address);
16786 assign_minipool_offsets (last_barrier);
16788 while (ftmp)
16790 if (!BARRIER_P (ftmp->insn)
16791 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
16792 == NULL))
16793 break;
16795 ftmp = ftmp->next;
16798 /* Scan over the fixes we have identified for this pool, fixing them
16799 up and adding the constants to the pool itself. */
16800 for (this_fix = fix; this_fix && ftmp != this_fix;
16801 this_fix = this_fix->next)
16802 if (!BARRIER_P (this_fix->insn))
16804 rtx addr
16805 = plus_constant (Pmode,
16806 gen_rtx_LABEL_REF (VOIDmode,
16807 minipool_vector_label),
16808 this_fix->minipool->offset);
16809 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
16812 dump_minipool (last_barrier->insn);
16813 fix = ftmp;
16816 /* From now on we must synthesize any constants that we can't handle
16817 directly. This can happen if the RTL gets split during final
16818 instruction generation. */
16819 after_arm_reorg = 1;
16821 /* Free the minipool memory. */
16822 obstack_free (&minipool_obstack, minipool_startobj);
16825 /* Routines to output assembly language. */
16827 /* If the rtx is the correct value then return the string of the number.
16828 In this way we can ensure that valid double constants are generated even
16829 when cross compiling. */
16830 const char *
16831 fp_immediate_constant (rtx x)
16833 REAL_VALUE_TYPE r;
16835 if (!fp_consts_inited)
16836 init_fp_table ();
16838 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16840 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
16841 return "0";
16844 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
16845 static const char *
16846 fp_const_from_val (REAL_VALUE_TYPE *r)
16848 if (!fp_consts_inited)
16849 init_fp_table ();
16851 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
16852 return "0";
16855 /* OPERANDS[0] is the entire list of insns that constitute pop,
16856 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16857 is in the list, UPDATE is true iff the list contains explicit
16858 update of base register. */
16859 void
16860 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
16861 bool update)
16863 int i;
16864 char pattern[100];
16865 int offset;
16866 const char *conditional;
16867 int num_saves = XVECLEN (operands[0], 0);
16868 unsigned int regno;
16869 unsigned int regno_base = REGNO (operands[1]);
16871 offset = 0;
16872 offset += update ? 1 : 0;
16873 offset += return_pc ? 1 : 0;
16875 /* Is the base register in the list? */
16876 for (i = offset; i < num_saves; i++)
16878 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
16879 /* If SP is in the list, then the base register must be SP. */
16880 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
16881 /* If base register is in the list, there must be no explicit update. */
16882 if (regno == regno_base)
16883 gcc_assert (!update);
16886 conditional = reverse ? "%?%D0" : "%?%d0";
16887 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
16889 /* Output pop (not stmfd) because it has a shorter encoding. */
16890 gcc_assert (update);
16891 sprintf (pattern, "pop%s\t{", conditional);
16893 else
16895 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16896 It's just a convention, their semantics are identical. */
16897 if (regno_base == SP_REGNUM)
16898 sprintf (pattern, "ldm%sfd\t", conditional);
16899 else if (TARGET_UNIFIED_ASM)
16900 sprintf (pattern, "ldmia%s\t", conditional);
16901 else
16902 sprintf (pattern, "ldm%sia\t", conditional);
16904 strcat (pattern, reg_names[regno_base]);
16905 if (update)
16906 strcat (pattern, "!, {");
16907 else
16908 strcat (pattern, ", {");
16911 /* Output the first destination register. */
16912 strcat (pattern,
16913 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
16915 /* Output the rest of the destination registers. */
16916 for (i = offset + 1; i < num_saves; i++)
16918 strcat (pattern, ", ");
16919 strcat (pattern,
16920 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
16923 strcat (pattern, "}");
16925 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
16926 strcat (pattern, "^");
16928 output_asm_insn (pattern, &cond);
16932 /* Output the assembly for a store multiple. */
16934 const char *
16935 vfp_output_fstmd (rtx * operands)
16937 char pattern[100];
16938 int p;
16939 int base;
16940 int i;
16942 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
16943 p = strlen (pattern);
16945 gcc_assert (REG_P (operands[1]));
16947 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
16948 for (i = 1; i < XVECLEN (operands[2], 0); i++)
16950 p += sprintf (&pattern[p], ", d%d", base + i);
16952 strcpy (&pattern[p], "}");
16954 output_asm_insn (pattern, operands);
16955 return "";
16959 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16960 number of bytes pushed. */
16962 static int
16963 vfp_emit_fstmd (int base_reg, int count)
16965 rtx par;
16966 rtx dwarf;
16967 rtx tmp, reg;
16968 int i;
16970 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16971 register pairs are stored by a store multiple insn. We avoid this
16972 by pushing an extra pair. */
16973 if (count == 2 && !arm_arch6)
16975 if (base_reg == LAST_VFP_REGNUM - 3)
16976 base_reg -= 2;
16977 count++;
16980 /* FSTMD may not store more than 16 doubleword registers at once. Split
16981 larger stores into multiple parts (up to a maximum of two, in
16982 practice). */
16983 if (count > 16)
16985 int saved;
16986 /* NOTE: base_reg is an internal register number, so each D register
16987 counts as 2. */
16988 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
16989 saved += vfp_emit_fstmd (base_reg, 16);
16990 return saved;
16993 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16994 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16996 reg = gen_rtx_REG (DFmode, base_reg);
16997 base_reg += 2;
16999 XVECEXP (par, 0, 0)
17000 = gen_rtx_SET (VOIDmode,
17001 gen_frame_mem
17002 (BLKmode,
17003 gen_rtx_PRE_MODIFY (Pmode,
17004 stack_pointer_rtx,
17005 plus_constant
17006 (Pmode, stack_pointer_rtx,
17007 - (count * 8)))
17009 gen_rtx_UNSPEC (BLKmode,
17010 gen_rtvec (1, reg),
17011 UNSPEC_PUSH_MULT));
17013 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17014 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17015 RTX_FRAME_RELATED_P (tmp) = 1;
17016 XVECEXP (dwarf, 0, 0) = tmp;
17018 tmp = gen_rtx_SET (VOIDmode,
17019 gen_frame_mem (DFmode, stack_pointer_rtx),
17020 reg);
17021 RTX_FRAME_RELATED_P (tmp) = 1;
17022 XVECEXP (dwarf, 0, 1) = tmp;
17024 for (i = 1; i < count; i++)
17026 reg = gen_rtx_REG (DFmode, base_reg);
17027 base_reg += 2;
17028 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17030 tmp = gen_rtx_SET (VOIDmode,
17031 gen_frame_mem (DFmode,
17032 plus_constant (Pmode,
17033 stack_pointer_rtx,
17034 i * 8)),
17035 reg);
17036 RTX_FRAME_RELATED_P (tmp) = 1;
17037 XVECEXP (dwarf, 0, i + 1) = tmp;
17040 par = emit_insn (par);
17041 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17042 RTX_FRAME_RELATED_P (par) = 1;
17044 return count * 8;
17047 /* Emit a call instruction with pattern PAT. ADDR is the address of
17048 the call target. */
17050 void
17051 arm_emit_call_insn (rtx pat, rtx addr)
17053 rtx insn;
17055 insn = emit_call_insn (pat);
17057 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17058 If the call might use such an entry, add a use of the PIC register
17059 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17060 if (TARGET_VXWORKS_RTP
17061 && flag_pic
17062 && GET_CODE (addr) == SYMBOL_REF
17063 && (SYMBOL_REF_DECL (addr)
17064 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17065 : !SYMBOL_REF_LOCAL_P (addr)))
17067 require_pic_register ();
17068 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17072 /* Output a 'call' insn. */
17073 const char *
17074 output_call (rtx *operands)
17076 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17078 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17079 if (REGNO (operands[0]) == LR_REGNUM)
17081 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17082 output_asm_insn ("mov%?\t%0, %|lr", operands);
17085 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17087 if (TARGET_INTERWORK || arm_arch4t)
17088 output_asm_insn ("bx%?\t%0", operands);
17089 else
17090 output_asm_insn ("mov%?\t%|pc, %0", operands);
17092 return "";
17095 /* Output a 'call' insn that is a reference in memory. This is
17096 disabled for ARMv5 and we prefer a blx instead because otherwise
17097 there's a significant performance overhead. */
17098 const char *
17099 output_call_mem (rtx *operands)
17101 gcc_assert (!arm_arch5);
17102 if (TARGET_INTERWORK)
17104 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17105 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17106 output_asm_insn ("bx%?\t%|ip", operands);
17108 else if (regno_use_in (LR_REGNUM, operands[0]))
17110 /* LR is used in the memory address. We load the address in the
17111 first instruction. It's safe to use IP as the target of the
17112 load since the call will kill it anyway. */
17113 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17114 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17115 if (arm_arch4t)
17116 output_asm_insn ("bx%?\t%|ip", operands);
17117 else
17118 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17120 else
17122 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17123 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17126 return "";
17130 /* Output a move from arm registers to arm registers of a long double
17131 OPERANDS[0] is the destination.
17132 OPERANDS[1] is the source. */
17133 const char *
17134 output_mov_long_double_arm_from_arm (rtx *operands)
17136 /* We have to be careful here because the two might overlap. */
17137 int dest_start = REGNO (operands[0]);
17138 int src_start = REGNO (operands[1]);
17139 rtx ops[2];
17140 int i;
17142 if (dest_start < src_start)
17144 for (i = 0; i < 3; i++)
17146 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17147 ops[1] = gen_rtx_REG (SImode, src_start + i);
17148 output_asm_insn ("mov%?\t%0, %1", ops);
17151 else
17153 for (i = 2; i >= 0; i--)
17155 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17156 ops[1] = gen_rtx_REG (SImode, src_start + i);
17157 output_asm_insn ("mov%?\t%0, %1", ops);
17161 return "";
17164 void
17165 arm_emit_movpair (rtx dest, rtx src)
17167 /* If the src is an immediate, simplify it. */
17168 if (CONST_INT_P (src))
17170 HOST_WIDE_INT val = INTVAL (src);
17171 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17172 if ((val >> 16) & 0x0000ffff)
17173 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17174 GEN_INT (16)),
17175 GEN_INT ((val >> 16) & 0x0000ffff));
17176 return;
17178 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17179 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17182 /* Output a move between double words. It must be REG<-MEM
17183 or MEM<-REG. */
17184 const char *
17185 output_move_double (rtx *operands, bool emit, int *count)
17187 enum rtx_code code0 = GET_CODE (operands[0]);
17188 enum rtx_code code1 = GET_CODE (operands[1]);
17189 rtx otherops[3];
17190 if (count)
17191 *count = 1;
17193 /* The only case when this might happen is when
17194 you are looking at the length of a DImode instruction
17195 that has an invalid constant in it. */
17196 if (code0 == REG && code1 != MEM)
17198 gcc_assert (!emit);
17199 *count = 2;
17200 return "";
17203 if (code0 == REG)
17205 unsigned int reg0 = REGNO (operands[0]);
17207 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17209 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17211 switch (GET_CODE (XEXP (operands[1], 0)))
17213 case REG:
17215 if (emit)
17217 if (TARGET_LDRD
17218 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17219 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17220 else
17221 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17223 break;
17225 case PRE_INC:
17226 gcc_assert (TARGET_LDRD);
17227 if (emit)
17228 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17229 break;
17231 case PRE_DEC:
17232 if (emit)
17234 if (TARGET_LDRD)
17235 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17236 else
17237 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17239 break;
17241 case POST_INC:
17242 if (emit)
17244 if (TARGET_LDRD)
17245 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17246 else
17247 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17249 break;
17251 case POST_DEC:
17252 gcc_assert (TARGET_LDRD);
17253 if (emit)
17254 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17255 break;
17257 case PRE_MODIFY:
17258 case POST_MODIFY:
17259 /* Autoicrement addressing modes should never have overlapping
17260 base and destination registers, and overlapping index registers
17261 are already prohibited, so this doesn't need to worry about
17262 fix_cm3_ldrd. */
17263 otherops[0] = operands[0];
17264 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17265 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17267 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17269 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17271 /* Registers overlap so split out the increment. */
17272 if (emit)
17274 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17275 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17277 if (count)
17278 *count = 2;
17280 else
17282 /* Use a single insn if we can.
17283 FIXME: IWMMXT allows offsets larger than ldrd can
17284 handle, fix these up with a pair of ldr. */
17285 if (TARGET_THUMB2
17286 || !CONST_INT_P (otherops[2])
17287 || (INTVAL (otherops[2]) > -256
17288 && INTVAL (otherops[2]) < 256))
17290 if (emit)
17291 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17293 else
17295 if (emit)
17297 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17298 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17300 if (count)
17301 *count = 2;
17306 else
17308 /* Use a single insn if we can.
17309 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17310 fix these up with a pair of ldr. */
17311 if (TARGET_THUMB2
17312 || !CONST_INT_P (otherops[2])
17313 || (INTVAL (otherops[2]) > -256
17314 && INTVAL (otherops[2]) < 256))
17316 if (emit)
17317 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17319 else
17321 if (emit)
17323 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17324 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17326 if (count)
17327 *count = 2;
17330 break;
17332 case LABEL_REF:
17333 case CONST:
17334 /* We might be able to use ldrd %0, %1 here. However the range is
17335 different to ldr/adr, and it is broken on some ARMv7-M
17336 implementations. */
17337 /* Use the second register of the pair to avoid problematic
17338 overlap. */
17339 otherops[1] = operands[1];
17340 if (emit)
17341 output_asm_insn ("adr%?\t%0, %1", otherops);
17342 operands[1] = otherops[0];
17343 if (emit)
17345 if (TARGET_LDRD)
17346 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17347 else
17348 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17351 if (count)
17352 *count = 2;
17353 break;
17355 /* ??? This needs checking for thumb2. */
17356 default:
17357 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17358 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17360 otherops[0] = operands[0];
17361 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17362 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17364 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17366 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17368 switch ((int) INTVAL (otherops[2]))
17370 case -8:
17371 if (emit)
17372 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17373 return "";
17374 case -4:
17375 if (TARGET_THUMB2)
17376 break;
17377 if (emit)
17378 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17379 return "";
17380 case 4:
17381 if (TARGET_THUMB2)
17382 break;
17383 if (emit)
17384 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17385 return "";
17388 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17389 operands[1] = otherops[0];
17390 if (TARGET_LDRD
17391 && (REG_P (otherops[2])
17392 || TARGET_THUMB2
17393 || (CONST_INT_P (otherops[2])
17394 && INTVAL (otherops[2]) > -256
17395 && INTVAL (otherops[2]) < 256)))
17397 if (reg_overlap_mentioned_p (operands[0],
17398 otherops[2]))
17400 rtx tmp;
17401 /* Swap base and index registers over to
17402 avoid a conflict. */
17403 tmp = otherops[1];
17404 otherops[1] = otherops[2];
17405 otherops[2] = tmp;
17407 /* If both registers conflict, it will usually
17408 have been fixed by a splitter. */
17409 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17410 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17412 if (emit)
17414 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17415 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17417 if (count)
17418 *count = 2;
17420 else
17422 otherops[0] = operands[0];
17423 if (emit)
17424 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17426 return "";
17429 if (CONST_INT_P (otherops[2]))
17431 if (emit)
17433 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17434 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17435 else
17436 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17439 else
17441 if (emit)
17442 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17445 else
17447 if (emit)
17448 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17451 if (count)
17452 *count = 2;
17454 if (TARGET_LDRD)
17455 return "ldr%(d%)\t%0, [%1]";
17457 return "ldm%(ia%)\t%1, %M0";
17459 else
17461 otherops[1] = adjust_address (operands[1], SImode, 4);
17462 /* Take care of overlapping base/data reg. */
17463 if (reg_mentioned_p (operands[0], operands[1]))
17465 if (emit)
17467 output_asm_insn ("ldr%?\t%0, %1", otherops);
17468 output_asm_insn ("ldr%?\t%0, %1", operands);
17470 if (count)
17471 *count = 2;
17474 else
17476 if (emit)
17478 output_asm_insn ("ldr%?\t%0, %1", operands);
17479 output_asm_insn ("ldr%?\t%0, %1", otherops);
17481 if (count)
17482 *count = 2;
17487 else
17489 /* Constraints should ensure this. */
17490 gcc_assert (code0 == MEM && code1 == REG);
17491 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17492 || (TARGET_ARM && TARGET_LDRD));
17494 switch (GET_CODE (XEXP (operands[0], 0)))
17496 case REG:
17497 if (emit)
17499 if (TARGET_LDRD)
17500 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17501 else
17502 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17504 break;
17506 case PRE_INC:
17507 gcc_assert (TARGET_LDRD);
17508 if (emit)
17509 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17510 break;
17512 case PRE_DEC:
17513 if (emit)
17515 if (TARGET_LDRD)
17516 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17517 else
17518 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17520 break;
17522 case POST_INC:
17523 if (emit)
17525 if (TARGET_LDRD)
17526 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17527 else
17528 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17530 break;
17532 case POST_DEC:
17533 gcc_assert (TARGET_LDRD);
17534 if (emit)
17535 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17536 break;
17538 case PRE_MODIFY:
17539 case POST_MODIFY:
17540 otherops[0] = operands[1];
17541 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17542 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17544 /* IWMMXT allows offsets larger than ldrd can handle,
17545 fix these up with a pair of ldr. */
17546 if (!TARGET_THUMB2
17547 && CONST_INT_P (otherops[2])
17548 && (INTVAL(otherops[2]) <= -256
17549 || INTVAL(otherops[2]) >= 256))
17551 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17553 if (emit)
17555 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17556 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17558 if (count)
17559 *count = 2;
17561 else
17563 if (emit)
17565 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17566 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17568 if (count)
17569 *count = 2;
17572 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17574 if (emit)
17575 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17577 else
17579 if (emit)
17580 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17582 break;
17584 case PLUS:
17585 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17586 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17588 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17590 case -8:
17591 if (emit)
17592 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17593 return "";
17595 case -4:
17596 if (TARGET_THUMB2)
17597 break;
17598 if (emit)
17599 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17600 return "";
17602 case 4:
17603 if (TARGET_THUMB2)
17604 break;
17605 if (emit)
17606 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17607 return "";
17610 if (TARGET_LDRD
17611 && (REG_P (otherops[2])
17612 || TARGET_THUMB2
17613 || (CONST_INT_P (otherops[2])
17614 && INTVAL (otherops[2]) > -256
17615 && INTVAL (otherops[2]) < 256)))
17617 otherops[0] = operands[1];
17618 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17619 if (emit)
17620 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17621 return "";
17623 /* Fall through */
17625 default:
17626 otherops[0] = adjust_address (operands[0], SImode, 4);
17627 otherops[1] = operands[1];
17628 if (emit)
17630 output_asm_insn ("str%?\t%1, %0", operands);
17631 output_asm_insn ("str%?\t%H1, %0", otherops);
17633 if (count)
17634 *count = 2;
17638 return "";
17641 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17642 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17644 const char *
17645 output_move_quad (rtx *operands)
17647 if (REG_P (operands[0]))
17649 /* Load, or reg->reg move. */
17651 if (MEM_P (operands[1]))
17653 switch (GET_CODE (XEXP (operands[1], 0)))
17655 case REG:
17656 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17657 break;
17659 case LABEL_REF:
17660 case CONST:
17661 output_asm_insn ("adr%?\t%0, %1", operands);
17662 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17663 break;
17665 default:
17666 gcc_unreachable ();
17669 else
17671 rtx ops[2];
17672 int dest, src, i;
17674 gcc_assert (REG_P (operands[1]));
17676 dest = REGNO (operands[0]);
17677 src = REGNO (operands[1]);
17679 /* This seems pretty dumb, but hopefully GCC won't try to do it
17680 very often. */
17681 if (dest < src)
17682 for (i = 0; i < 4; i++)
17684 ops[0] = gen_rtx_REG (SImode, dest + i);
17685 ops[1] = gen_rtx_REG (SImode, src + i);
17686 output_asm_insn ("mov%?\t%0, %1", ops);
17688 else
17689 for (i = 3; i >= 0; i--)
17691 ops[0] = gen_rtx_REG (SImode, dest + i);
17692 ops[1] = gen_rtx_REG (SImode, src + i);
17693 output_asm_insn ("mov%?\t%0, %1", ops);
17697 else
17699 gcc_assert (MEM_P (operands[0]));
17700 gcc_assert (REG_P (operands[1]));
17701 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17703 switch (GET_CODE (XEXP (operands[0], 0)))
17705 case REG:
17706 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17707 break;
17709 default:
17710 gcc_unreachable ();
17714 return "";
17717 /* Output a VFP load or store instruction. */
17719 const char *
17720 output_move_vfp (rtx *operands)
17722 rtx reg, mem, addr, ops[2];
17723 int load = REG_P (operands[0]);
17724 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
17725 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
17726 const char *templ;
17727 char buff[50];
17728 enum machine_mode mode;
17730 reg = operands[!load];
17731 mem = operands[load];
17733 mode = GET_MODE (reg);
17735 gcc_assert (REG_P (reg));
17736 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
17737 gcc_assert (mode == SFmode
17738 || mode == DFmode
17739 || mode == SImode
17740 || mode == DImode
17741 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
17742 gcc_assert (MEM_P (mem));
17744 addr = XEXP (mem, 0);
17746 switch (GET_CODE (addr))
17748 case PRE_DEC:
17749 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
17750 ops[0] = XEXP (addr, 0);
17751 ops[1] = reg;
17752 break;
17754 case POST_INC:
17755 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
17756 ops[0] = XEXP (addr, 0);
17757 ops[1] = reg;
17758 break;
17760 default:
17761 templ = "f%s%c%%?\t%%%s0, %%1%s";
17762 ops[0] = reg;
17763 ops[1] = mem;
17764 break;
17767 sprintf (buff, templ,
17768 load ? "ld" : "st",
17769 dp ? 'd' : 's',
17770 dp ? "P" : "",
17771 integer_p ? "\t%@ int" : "");
17772 output_asm_insn (buff, ops);
17774 return "";
17777 /* Output a Neon double-word or quad-word load or store, or a load
17778 or store for larger structure modes.
17780 WARNING: The ordering of elements is weird in big-endian mode,
17781 because the EABI requires that vectors stored in memory appear
17782 as though they were stored by a VSTM, as required by the EABI.
17783 GCC RTL defines element ordering based on in-memory order.
17784 This can be different from the architectural ordering of elements
17785 within a NEON register. The intrinsics defined in arm_neon.h use the
17786 NEON register element ordering, not the GCC RTL element ordering.
17788 For example, the in-memory ordering of a big-endian a quadword
17789 vector with 16-bit elements when stored from register pair {d0,d1}
17790 will be (lowest address first, d0[N] is NEON register element N):
17792 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17794 When necessary, quadword registers (dN, dN+1) are moved to ARM
17795 registers from rN in the order:
17797 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17799 So that STM/LDM can be used on vectors in ARM registers, and the
17800 same memory layout will result as if VSTM/VLDM were used.
17802 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17803 possible, which allows use of appropriate alignment tags.
17804 Note that the choice of "64" is independent of the actual vector
17805 element size; this size simply ensures that the behavior is
17806 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17808 Due to limitations of those instructions, use of VST1.64/VLD1.64
17809 is not possible if:
17810 - the address contains PRE_DEC, or
17811 - the mode refers to more than 4 double-word registers
17813 In those cases, it would be possible to replace VSTM/VLDM by a
17814 sequence of instructions; this is not currently implemented since
17815 this is not certain to actually improve performance. */
17817 const char *
17818 output_move_neon (rtx *operands)
17820 rtx reg, mem, addr, ops[2];
17821 int regno, nregs, load = REG_P (operands[0]);
17822 const char *templ;
17823 char buff[50];
17824 enum machine_mode mode;
17826 reg = operands[!load];
17827 mem = operands[load];
17829 mode = GET_MODE (reg);
17831 gcc_assert (REG_P (reg));
17832 regno = REGNO (reg);
17833 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
17834 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
17835 || NEON_REGNO_OK_FOR_QUAD (regno));
17836 gcc_assert (VALID_NEON_DREG_MODE (mode)
17837 || VALID_NEON_QREG_MODE (mode)
17838 || VALID_NEON_STRUCT_MODE (mode));
17839 gcc_assert (MEM_P (mem));
17841 addr = XEXP (mem, 0);
17843 /* Strip off const from addresses like (const (plus (...))). */
17844 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17845 addr = XEXP (addr, 0);
17847 switch (GET_CODE (addr))
17849 case POST_INC:
17850 /* We have to use vldm / vstm for too-large modes. */
17851 if (nregs > 4)
17853 templ = "v%smia%%?\t%%0!, %%h1";
17854 ops[0] = XEXP (addr, 0);
17856 else
17858 templ = "v%s1.64\t%%h1, %%A0";
17859 ops[0] = mem;
17861 ops[1] = reg;
17862 break;
17864 case PRE_DEC:
17865 /* We have to use vldm / vstm in this case, since there is no
17866 pre-decrement form of the vld1 / vst1 instructions. */
17867 templ = "v%smdb%%?\t%%0!, %%h1";
17868 ops[0] = XEXP (addr, 0);
17869 ops[1] = reg;
17870 break;
17872 case POST_MODIFY:
17873 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17874 gcc_unreachable ();
17876 case LABEL_REF:
17877 case PLUS:
17879 int i;
17880 int overlap = -1;
17881 for (i = 0; i < nregs; i++)
17883 /* We're only using DImode here because it's a convenient size. */
17884 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
17885 ops[1] = adjust_address (mem, DImode, 8 * i);
17886 if (reg_overlap_mentioned_p (ops[0], mem))
17888 gcc_assert (overlap == -1);
17889 overlap = i;
17891 else
17893 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17894 output_asm_insn (buff, ops);
17897 if (overlap != -1)
17899 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
17900 ops[1] = adjust_address (mem, SImode, 8 * overlap);
17901 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
17902 output_asm_insn (buff, ops);
17905 return "";
17908 default:
17909 /* We have to use vldm / vstm for too-large modes. */
17910 if (nregs > 4)
17911 templ = "v%smia%%?\t%%m0, %%h1";
17912 else
17913 templ = "v%s1.64\t%%h1, %%A0";
17915 ops[0] = mem;
17916 ops[1] = reg;
17919 sprintf (buff, templ, load ? "ld" : "st");
17920 output_asm_insn (buff, ops);
17922 return "";
17925 /* Compute and return the length of neon_mov<mode>, where <mode> is
17926 one of VSTRUCT modes: EI, OI, CI or XI. */
17928 arm_attr_length_move_neon (rtx insn)
17930 rtx reg, mem, addr;
17931 int load;
17932 enum machine_mode mode;
17934 extract_insn_cached (insn);
17936 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
17938 mode = GET_MODE (recog_data.operand[0]);
17939 switch (mode)
17941 case EImode:
17942 case OImode:
17943 return 8;
17944 case CImode:
17945 return 12;
17946 case XImode:
17947 return 16;
17948 default:
17949 gcc_unreachable ();
17953 load = REG_P (recog_data.operand[0]);
17954 reg = recog_data.operand[!load];
17955 mem = recog_data.operand[load];
17957 gcc_assert (MEM_P (mem));
17959 mode = GET_MODE (reg);
17960 addr = XEXP (mem, 0);
17962 /* Strip off const from addresses like (const (plus (...))). */
17963 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
17964 addr = XEXP (addr, 0);
17966 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
17968 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
17969 return insns * 4;
17971 else
17972 return 4;
17975 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17976 return zero. */
17979 arm_address_offset_is_imm (rtx insn)
17981 rtx mem, addr;
17983 extract_insn_cached (insn);
17985 if (REG_P (recog_data.operand[0]))
17986 return 0;
17988 mem = recog_data.operand[0];
17990 gcc_assert (MEM_P (mem));
17992 addr = XEXP (mem, 0);
17994 if (REG_P (addr)
17995 || (GET_CODE (addr) == PLUS
17996 && REG_P (XEXP (addr, 0))
17997 && CONST_INT_P (XEXP (addr, 1))))
17998 return 1;
17999 else
18000 return 0;
18003 /* Output an ADD r, s, #n where n may be too big for one instruction.
18004 If adding zero to one register, output nothing. */
18005 const char *
18006 output_add_immediate (rtx *operands)
18008 HOST_WIDE_INT n = INTVAL (operands[2]);
18010 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18012 if (n < 0)
18013 output_multi_immediate (operands,
18014 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18015 -n);
18016 else
18017 output_multi_immediate (operands,
18018 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18022 return "";
18025 /* Output a multiple immediate operation.
18026 OPERANDS is the vector of operands referred to in the output patterns.
18027 INSTR1 is the output pattern to use for the first constant.
18028 INSTR2 is the output pattern to use for subsequent constants.
18029 IMMED_OP is the index of the constant slot in OPERANDS.
18030 N is the constant value. */
18031 static const char *
18032 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18033 int immed_op, HOST_WIDE_INT n)
18035 #if HOST_BITS_PER_WIDE_INT > 32
18036 n &= 0xffffffff;
18037 #endif
18039 if (n == 0)
18041 /* Quick and easy output. */
18042 operands[immed_op] = const0_rtx;
18043 output_asm_insn (instr1, operands);
18045 else
18047 int i;
18048 const char * instr = instr1;
18050 /* Note that n is never zero here (which would give no output). */
18051 for (i = 0; i < 32; i += 2)
18053 if (n & (3 << i))
18055 operands[immed_op] = GEN_INT (n & (255 << i));
18056 output_asm_insn (instr, operands);
18057 instr = instr2;
18058 i += 6;
18063 return "";
18066 /* Return the name of a shifter operation. */
18067 static const char *
18068 arm_shift_nmem(enum rtx_code code)
18070 switch (code)
18072 case ASHIFT:
18073 return ARM_LSL_NAME;
18075 case ASHIFTRT:
18076 return "asr";
18078 case LSHIFTRT:
18079 return "lsr";
18081 case ROTATERT:
18082 return "ror";
18084 default:
18085 abort();
18089 /* Return the appropriate ARM instruction for the operation code.
18090 The returned result should not be overwritten. OP is the rtx of the
18091 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18092 was shifted. */
18093 const char *
18094 arithmetic_instr (rtx op, int shift_first_arg)
18096 switch (GET_CODE (op))
18098 case PLUS:
18099 return "add";
18101 case MINUS:
18102 return shift_first_arg ? "rsb" : "sub";
18104 case IOR:
18105 return "orr";
18107 case XOR:
18108 return "eor";
18110 case AND:
18111 return "and";
18113 case ASHIFT:
18114 case ASHIFTRT:
18115 case LSHIFTRT:
18116 case ROTATERT:
18117 return arm_shift_nmem(GET_CODE(op));
18119 default:
18120 gcc_unreachable ();
18124 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18125 for the operation code. The returned result should not be overwritten.
18126 OP is the rtx code of the shift.
18127 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18128 shift. */
18129 static const char *
18130 shift_op (rtx op, HOST_WIDE_INT *amountp)
18132 const char * mnem;
18133 enum rtx_code code = GET_CODE (op);
18135 switch (code)
18137 case ROTATE:
18138 if (!CONST_INT_P (XEXP (op, 1)))
18140 output_operand_lossage ("invalid shift operand");
18141 return NULL;
18144 code = ROTATERT;
18145 *amountp = 32 - INTVAL (XEXP (op, 1));
18146 mnem = "ror";
18147 break;
18149 case ASHIFT:
18150 case ASHIFTRT:
18151 case LSHIFTRT:
18152 case ROTATERT:
18153 mnem = arm_shift_nmem(code);
18154 if (CONST_INT_P (XEXP (op, 1)))
18156 *amountp = INTVAL (XEXP (op, 1));
18158 else if (REG_P (XEXP (op, 1)))
18160 *amountp = -1;
18161 return mnem;
18163 else
18165 output_operand_lossage ("invalid shift operand");
18166 return NULL;
18168 break;
18170 case MULT:
18171 /* We never have to worry about the amount being other than a
18172 power of 2, since this case can never be reloaded from a reg. */
18173 if (!CONST_INT_P (XEXP (op, 1)))
18175 output_operand_lossage ("invalid shift operand");
18176 return NULL;
18179 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18181 /* Amount must be a power of two. */
18182 if (*amountp & (*amountp - 1))
18184 output_operand_lossage ("invalid shift operand");
18185 return NULL;
18188 *amountp = int_log2 (*amountp);
18189 return ARM_LSL_NAME;
18191 default:
18192 output_operand_lossage ("invalid shift operand");
18193 return NULL;
18196 /* This is not 100% correct, but follows from the desire to merge
18197 multiplication by a power of 2 with the recognizer for a
18198 shift. >=32 is not a valid shift for "lsl", so we must try and
18199 output a shift that produces the correct arithmetical result.
18200 Using lsr #32 is identical except for the fact that the carry bit
18201 is not set correctly if we set the flags; but we never use the
18202 carry bit from such an operation, so we can ignore that. */
18203 if (code == ROTATERT)
18204 /* Rotate is just modulo 32. */
18205 *amountp &= 31;
18206 else if (*amountp != (*amountp & 31))
18208 if (code == ASHIFT)
18209 mnem = "lsr";
18210 *amountp = 32;
18213 /* Shifts of 0 are no-ops. */
18214 if (*amountp == 0)
18215 return NULL;
18217 return mnem;
18220 /* Obtain the shift from the POWER of two. */
18222 static HOST_WIDE_INT
18223 int_log2 (HOST_WIDE_INT power)
18225 HOST_WIDE_INT shift = 0;
18227 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18229 gcc_assert (shift <= 31);
18230 shift++;
18233 return shift;
18236 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18237 because /bin/as is horribly restrictive. The judgement about
18238 whether or not each character is 'printable' (and can be output as
18239 is) or not (and must be printed with an octal escape) must be made
18240 with reference to the *host* character set -- the situation is
18241 similar to that discussed in the comments above pp_c_char in
18242 c-pretty-print.c. */
18244 #define MAX_ASCII_LEN 51
18246 void
18247 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18249 int i;
18250 int len_so_far = 0;
18252 fputs ("\t.ascii\t\"", stream);
18254 for (i = 0; i < len; i++)
18256 int c = p[i];
18258 if (len_so_far >= MAX_ASCII_LEN)
18260 fputs ("\"\n\t.ascii\t\"", stream);
18261 len_so_far = 0;
18264 if (ISPRINT (c))
18266 if (c == '\\' || c == '\"')
18268 putc ('\\', stream);
18269 len_so_far++;
18271 putc (c, stream);
18272 len_so_far++;
18274 else
18276 fprintf (stream, "\\%03o", c);
18277 len_so_far += 4;
18281 fputs ("\"\n", stream);
18284 /* Compute the register save mask for registers 0 through 12
18285 inclusive. This code is used by arm_compute_save_reg_mask. */
18287 static unsigned long
18288 arm_compute_save_reg0_reg12_mask (void)
18290 unsigned long func_type = arm_current_func_type ();
18291 unsigned long save_reg_mask = 0;
18292 unsigned int reg;
18294 if (IS_INTERRUPT (func_type))
18296 unsigned int max_reg;
18297 /* Interrupt functions must not corrupt any registers,
18298 even call clobbered ones. If this is a leaf function
18299 we can just examine the registers used by the RTL, but
18300 otherwise we have to assume that whatever function is
18301 called might clobber anything, and so we have to save
18302 all the call-clobbered registers as well. */
18303 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18304 /* FIQ handlers have registers r8 - r12 banked, so
18305 we only need to check r0 - r7, Normal ISRs only
18306 bank r14 and r15, so we must check up to r12.
18307 r13 is the stack pointer which is always preserved,
18308 so we do not need to consider it here. */
18309 max_reg = 7;
18310 else
18311 max_reg = 12;
18313 for (reg = 0; reg <= max_reg; reg++)
18314 if (df_regs_ever_live_p (reg)
18315 || (! crtl->is_leaf && call_used_regs[reg]))
18316 save_reg_mask |= (1 << reg);
18318 /* Also save the pic base register if necessary. */
18319 if (flag_pic
18320 && !TARGET_SINGLE_PIC_BASE
18321 && arm_pic_register != INVALID_REGNUM
18322 && crtl->uses_pic_offset_table)
18323 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18325 else if (IS_VOLATILE(func_type))
18327 /* For noreturn functions we historically omitted register saves
18328 altogether. However this really messes up debugging. As a
18329 compromise save just the frame pointers. Combined with the link
18330 register saved elsewhere this should be sufficient to get
18331 a backtrace. */
18332 if (frame_pointer_needed)
18333 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18334 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18335 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18336 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18337 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18339 else
18341 /* In the normal case we only need to save those registers
18342 which are call saved and which are used by this function. */
18343 for (reg = 0; reg <= 11; reg++)
18344 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18345 save_reg_mask |= (1 << reg);
18347 /* Handle the frame pointer as a special case. */
18348 if (frame_pointer_needed)
18349 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18351 /* If we aren't loading the PIC register,
18352 don't stack it even though it may be live. */
18353 if (flag_pic
18354 && !TARGET_SINGLE_PIC_BASE
18355 && arm_pic_register != INVALID_REGNUM
18356 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18357 || crtl->uses_pic_offset_table))
18358 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18360 /* The prologue will copy SP into R0, so save it. */
18361 if (IS_STACKALIGN (func_type))
18362 save_reg_mask |= 1;
18365 /* Save registers so the exception handler can modify them. */
18366 if (crtl->calls_eh_return)
18368 unsigned int i;
18370 for (i = 0; ; i++)
18372 reg = EH_RETURN_DATA_REGNO (i);
18373 if (reg == INVALID_REGNUM)
18374 break;
18375 save_reg_mask |= 1 << reg;
18379 return save_reg_mask;
18382 /* Return true if r3 is live at the start of the function. */
18384 static bool
18385 arm_r3_live_at_start_p (void)
18387 /* Just look at cfg info, which is still close enough to correct at this
18388 point. This gives false positives for broken functions that might use
18389 uninitialized data that happens to be allocated in r3, but who cares? */
18390 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
18394 /* Compute the number of bytes used to store the static chain register on the
18395 stack, above the stack frame. We need to know this accurately to get the
18396 alignment of the rest of the stack frame correct. */
18398 static int
18399 arm_compute_static_chain_stack_bytes (void)
18401 /* See the defining assertion in arm_expand_prologue. */
18402 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18403 && IS_NESTED (arm_current_func_type ())
18404 && arm_r3_live_at_start_p ()
18405 && crtl->args.pretend_args_size == 0)
18406 return 4;
18408 return 0;
18411 /* Compute a bit mask of which registers need to be
18412 saved on the stack for the current function.
18413 This is used by arm_get_frame_offsets, which may add extra registers. */
18415 static unsigned long
18416 arm_compute_save_reg_mask (void)
18418 unsigned int save_reg_mask = 0;
18419 unsigned long func_type = arm_current_func_type ();
18420 unsigned int reg;
18422 if (IS_NAKED (func_type))
18423 /* This should never really happen. */
18424 return 0;
18426 /* If we are creating a stack frame, then we must save the frame pointer,
18427 IP (which will hold the old stack pointer), LR and the PC. */
18428 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18429 save_reg_mask |=
18430 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18431 | (1 << IP_REGNUM)
18432 | (1 << LR_REGNUM)
18433 | (1 << PC_REGNUM);
18435 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18437 /* Decide if we need to save the link register.
18438 Interrupt routines have their own banked link register,
18439 so they never need to save it.
18440 Otherwise if we do not use the link register we do not need to save
18441 it. If we are pushing other registers onto the stack however, we
18442 can save an instruction in the epilogue by pushing the link register
18443 now and then popping it back into the PC. This incurs extra memory
18444 accesses though, so we only do it when optimizing for size, and only
18445 if we know that we will not need a fancy return sequence. */
18446 if (df_regs_ever_live_p (LR_REGNUM)
18447 || (save_reg_mask
18448 && optimize_size
18449 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18450 && !crtl->calls_eh_return))
18451 save_reg_mask |= 1 << LR_REGNUM;
18453 if (cfun->machine->lr_save_eliminated)
18454 save_reg_mask &= ~ (1 << LR_REGNUM);
18456 if (TARGET_REALLY_IWMMXT
18457 && ((bit_count (save_reg_mask)
18458 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18459 arm_compute_static_chain_stack_bytes())
18460 ) % 2) != 0)
18462 /* The total number of registers that are going to be pushed
18463 onto the stack is odd. We need to ensure that the stack
18464 is 64-bit aligned before we start to save iWMMXt registers,
18465 and also before we start to create locals. (A local variable
18466 might be a double or long long which we will load/store using
18467 an iWMMXt instruction). Therefore we need to push another
18468 ARM register, so that the stack will be 64-bit aligned. We
18469 try to avoid using the arg registers (r0 -r3) as they might be
18470 used to pass values in a tail call. */
18471 for (reg = 4; reg <= 12; reg++)
18472 if ((save_reg_mask & (1 << reg)) == 0)
18473 break;
18475 if (reg <= 12)
18476 save_reg_mask |= (1 << reg);
18477 else
18479 cfun->machine->sibcall_blocked = 1;
18480 save_reg_mask |= (1 << 3);
18484 /* We may need to push an additional register for use initializing the
18485 PIC base register. */
18486 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18487 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18489 reg = thumb_find_work_register (1 << 4);
18490 if (!call_used_regs[reg])
18491 save_reg_mask |= (1 << reg);
18494 return save_reg_mask;
18498 /* Compute a bit mask of which registers need to be
18499 saved on the stack for the current function. */
18500 static unsigned long
18501 thumb1_compute_save_reg_mask (void)
18503 unsigned long mask;
18504 unsigned reg;
18506 mask = 0;
18507 for (reg = 0; reg < 12; reg ++)
18508 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18509 mask |= 1 << reg;
18511 if (flag_pic
18512 && !TARGET_SINGLE_PIC_BASE
18513 && arm_pic_register != INVALID_REGNUM
18514 && crtl->uses_pic_offset_table)
18515 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18517 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18518 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18519 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18521 /* LR will also be pushed if any lo regs are pushed. */
18522 if (mask & 0xff || thumb_force_lr_save ())
18523 mask |= (1 << LR_REGNUM);
18525 /* Make sure we have a low work register if we need one.
18526 We will need one if we are going to push a high register,
18527 but we are not currently intending to push a low register. */
18528 if ((mask & 0xff) == 0
18529 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18531 /* Use thumb_find_work_register to choose which register
18532 we will use. If the register is live then we will
18533 have to push it. Use LAST_LO_REGNUM as our fallback
18534 choice for the register to select. */
18535 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18536 /* Make sure the register returned by thumb_find_work_register is
18537 not part of the return value. */
18538 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18539 reg = LAST_LO_REGNUM;
18541 if (! call_used_regs[reg])
18542 mask |= 1 << reg;
18545 /* The 504 below is 8 bytes less than 512 because there are two possible
18546 alignment words. We can't tell here if they will be present or not so we
18547 have to play it safe and assume that they are. */
18548 if ((CALLER_INTERWORKING_SLOT_SIZE +
18549 ROUND_UP_WORD (get_frame_size ()) +
18550 crtl->outgoing_args_size) >= 504)
18552 /* This is the same as the code in thumb1_expand_prologue() which
18553 determines which register to use for stack decrement. */
18554 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18555 if (mask & (1 << reg))
18556 break;
18558 if (reg > LAST_LO_REGNUM)
18560 /* Make sure we have a register available for stack decrement. */
18561 mask |= 1 << LAST_LO_REGNUM;
18565 return mask;
18569 /* Return the number of bytes required to save VFP registers. */
18570 static int
18571 arm_get_vfp_saved_size (void)
18573 unsigned int regno;
18574 int count;
18575 int saved;
18577 saved = 0;
18578 /* Space for saved VFP registers. */
18579 if (TARGET_HARD_FLOAT && TARGET_VFP)
18581 count = 0;
18582 for (regno = FIRST_VFP_REGNUM;
18583 regno < LAST_VFP_REGNUM;
18584 regno += 2)
18586 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18587 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18589 if (count > 0)
18591 /* Workaround ARM10 VFPr1 bug. */
18592 if (count == 2 && !arm_arch6)
18593 count++;
18594 saved += count * 8;
18596 count = 0;
18598 else
18599 count++;
18601 if (count > 0)
18603 if (count == 2 && !arm_arch6)
18604 count++;
18605 saved += count * 8;
18608 return saved;
18612 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18613 everything bar the final return instruction. If simple_return is true,
18614 then do not output epilogue, because it has already been emitted in RTL. */
18615 const char *
18616 output_return_instruction (rtx operand, bool really_return, bool reverse,
18617 bool simple_return)
18619 char conditional[10];
18620 char instr[100];
18621 unsigned reg;
18622 unsigned long live_regs_mask;
18623 unsigned long func_type;
18624 arm_stack_offsets *offsets;
18626 func_type = arm_current_func_type ();
18628 if (IS_NAKED (func_type))
18629 return "";
18631 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18633 /* If this function was declared non-returning, and we have
18634 found a tail call, then we have to trust that the called
18635 function won't return. */
18636 if (really_return)
18638 rtx ops[2];
18640 /* Otherwise, trap an attempted return by aborting. */
18641 ops[0] = operand;
18642 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18643 : "abort");
18644 assemble_external_libcall (ops[1]);
18645 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18648 return "";
18651 gcc_assert (!cfun->calls_alloca || really_return);
18653 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18655 cfun->machine->return_used_this_function = 1;
18657 offsets = arm_get_frame_offsets ();
18658 live_regs_mask = offsets->saved_regs_mask;
18660 if (!simple_return && live_regs_mask)
18662 const char * return_reg;
18664 /* If we do not have any special requirements for function exit
18665 (e.g. interworking) then we can load the return address
18666 directly into the PC. Otherwise we must load it into LR. */
18667 if (really_return
18668 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18669 return_reg = reg_names[PC_REGNUM];
18670 else
18671 return_reg = reg_names[LR_REGNUM];
18673 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18675 /* There are three possible reasons for the IP register
18676 being saved. 1) a stack frame was created, in which case
18677 IP contains the old stack pointer, or 2) an ISR routine
18678 corrupted it, or 3) it was saved to align the stack on
18679 iWMMXt. In case 1, restore IP into SP, otherwise just
18680 restore IP. */
18681 if (frame_pointer_needed)
18683 live_regs_mask &= ~ (1 << IP_REGNUM);
18684 live_regs_mask |= (1 << SP_REGNUM);
18686 else
18687 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18690 /* On some ARM architectures it is faster to use LDR rather than
18691 LDM to load a single register. On other architectures, the
18692 cost is the same. In 26 bit mode, or for exception handlers,
18693 we have to use LDM to load the PC so that the CPSR is also
18694 restored. */
18695 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18696 if (live_regs_mask == (1U << reg))
18697 break;
18699 if (reg <= LAST_ARM_REGNUM
18700 && (reg != LR_REGNUM
18701 || ! really_return
18702 || ! IS_INTERRUPT (func_type)))
18704 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18705 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18707 else
18709 char *p;
18710 int first = 1;
18712 /* Generate the load multiple instruction to restore the
18713 registers. Note we can get here, even if
18714 frame_pointer_needed is true, but only if sp already
18715 points to the base of the saved core registers. */
18716 if (live_regs_mask & (1 << SP_REGNUM))
18718 unsigned HOST_WIDE_INT stack_adjust;
18720 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
18721 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
18723 if (stack_adjust && arm_arch5 && TARGET_ARM)
18724 if (TARGET_UNIFIED_ASM)
18725 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
18726 else
18727 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
18728 else
18730 /* If we can't use ldmib (SA110 bug),
18731 then try to pop r3 instead. */
18732 if (stack_adjust)
18733 live_regs_mask |= 1 << 3;
18735 if (TARGET_UNIFIED_ASM)
18736 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
18737 else
18738 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
18741 else
18742 if (TARGET_UNIFIED_ASM)
18743 sprintf (instr, "pop%s\t{", conditional);
18744 else
18745 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
18747 p = instr + strlen (instr);
18749 for (reg = 0; reg <= SP_REGNUM; reg++)
18750 if (live_regs_mask & (1 << reg))
18752 int l = strlen (reg_names[reg]);
18754 if (first)
18755 first = 0;
18756 else
18758 memcpy (p, ", ", 2);
18759 p += 2;
18762 memcpy (p, "%|", 2);
18763 memcpy (p + 2, reg_names[reg], l);
18764 p += l + 2;
18767 if (live_regs_mask & (1 << LR_REGNUM))
18769 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
18770 /* If returning from an interrupt, restore the CPSR. */
18771 if (IS_INTERRUPT (func_type))
18772 strcat (p, "^");
18774 else
18775 strcpy (p, "}");
18778 output_asm_insn (instr, & operand);
18780 /* See if we need to generate an extra instruction to
18781 perform the actual function return. */
18782 if (really_return
18783 && func_type != ARM_FT_INTERWORKED
18784 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
18786 /* The return has already been handled
18787 by loading the LR into the PC. */
18788 return "";
18792 if (really_return)
18794 switch ((int) ARM_FUNC_TYPE (func_type))
18796 case ARM_FT_ISR:
18797 case ARM_FT_FIQ:
18798 /* ??? This is wrong for unified assembly syntax. */
18799 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
18800 break;
18802 case ARM_FT_INTERWORKED:
18803 sprintf (instr, "bx%s\t%%|lr", conditional);
18804 break;
18806 case ARM_FT_EXCEPTION:
18807 /* ??? This is wrong for unified assembly syntax. */
18808 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
18809 break;
18811 default:
18812 /* Use bx if it's available. */
18813 if (arm_arch5 || arm_arch4t)
18814 sprintf (instr, "bx%s\t%%|lr", conditional);
18815 else
18816 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
18817 break;
18820 output_asm_insn (instr, & operand);
18823 return "";
18826 /* Write the function name into the code section, directly preceding
18827 the function prologue.
18829 Code will be output similar to this:
18831 .ascii "arm_poke_function_name", 0
18832 .align
18834 .word 0xff000000 + (t1 - t0)
18835 arm_poke_function_name
18836 mov ip, sp
18837 stmfd sp!, {fp, ip, lr, pc}
18838 sub fp, ip, #4
18840 When performing a stack backtrace, code can inspect the value
18841 of 'pc' stored at 'fp' + 0. If the trace function then looks
18842 at location pc - 12 and the top 8 bits are set, then we know
18843 that there is a function name embedded immediately preceding this
18844 location and has length ((pc[-3]) & 0xff000000).
18846 We assume that pc is declared as a pointer to an unsigned long.
18848 It is of no benefit to output the function name if we are assembling
18849 a leaf function. These function types will not contain a stack
18850 backtrace structure, therefore it is not possible to determine the
18851 function name. */
18852 void
18853 arm_poke_function_name (FILE *stream, const char *name)
18855 unsigned long alignlength;
18856 unsigned long length;
18857 rtx x;
18859 length = strlen (name) + 1;
18860 alignlength = ROUND_UP_WORD (length);
18862 ASM_OUTPUT_ASCII (stream, name, length);
18863 ASM_OUTPUT_ALIGN (stream, 2);
18864 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
18865 assemble_aligned_integer (UNITS_PER_WORD, x);
18868 /* Place some comments into the assembler stream
18869 describing the current function. */
18870 static void
18871 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
18873 unsigned long func_type;
18875 /* ??? Do we want to print some of the below anyway? */
18876 if (TARGET_THUMB1)
18877 return;
18879 /* Sanity check. */
18880 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
18882 func_type = arm_current_func_type ();
18884 switch ((int) ARM_FUNC_TYPE (func_type))
18886 default:
18887 case ARM_FT_NORMAL:
18888 break;
18889 case ARM_FT_INTERWORKED:
18890 asm_fprintf (f, "\t%@ Function supports interworking.\n");
18891 break;
18892 case ARM_FT_ISR:
18893 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
18894 break;
18895 case ARM_FT_FIQ:
18896 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
18897 break;
18898 case ARM_FT_EXCEPTION:
18899 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
18900 break;
18903 if (IS_NAKED (func_type))
18904 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18906 if (IS_VOLATILE (func_type))
18907 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
18909 if (IS_NESTED (func_type))
18910 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
18911 if (IS_STACKALIGN (func_type))
18912 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18914 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18915 crtl->args.size,
18916 crtl->args.pretend_args_size, frame_size);
18918 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18919 frame_pointer_needed,
18920 cfun->machine->uses_anonymous_args);
18922 if (cfun->machine->lr_save_eliminated)
18923 asm_fprintf (f, "\t%@ link register save eliminated.\n");
18925 if (crtl->calls_eh_return)
18926 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
18930 static void
18931 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
18932 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
18934 arm_stack_offsets *offsets;
18936 if (TARGET_THUMB1)
18938 int regno;
18940 /* Emit any call-via-reg trampolines that are needed for v4t support
18941 of call_reg and call_value_reg type insns. */
18942 for (regno = 0; regno < LR_REGNUM; regno++)
18944 rtx label = cfun->machine->call_via[regno];
18946 if (label != NULL)
18948 switch_to_section (function_section (current_function_decl));
18949 targetm.asm_out.internal_label (asm_out_file, "L",
18950 CODE_LABEL_NUMBER (label));
18951 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18955 /* ??? Probably not safe to set this here, since it assumes that a
18956 function will be emitted as assembly immediately after we generate
18957 RTL for it. This does not happen for inline functions. */
18958 cfun->machine->return_used_this_function = 0;
18960 else /* TARGET_32BIT */
18962 /* We need to take into account any stack-frame rounding. */
18963 offsets = arm_get_frame_offsets ();
18965 gcc_assert (!use_return_insn (FALSE, NULL)
18966 || (cfun->machine->return_used_this_function != 0)
18967 || offsets->saved_regs == offsets->outgoing_args
18968 || frame_pointer_needed);
18970 /* Reset the ARM-specific per-function variables. */
18971 after_arm_reorg = 0;
18975 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18976 STR and STRD. If an even number of registers are being pushed, one
18977 or more STRD patterns are created for each register pair. If an
18978 odd number of registers are pushed, emit an initial STR followed by
18979 as many STRD instructions as are needed. This works best when the
18980 stack is initially 64-bit aligned (the normal case), since it
18981 ensures that each STRD is also 64-bit aligned. */
18982 static void
18983 thumb2_emit_strd_push (unsigned long saved_regs_mask)
18985 int num_regs = 0;
18986 int i;
18987 int regno;
18988 rtx par = NULL_RTX;
18989 rtx dwarf = NULL_RTX;
18990 rtx tmp;
18991 bool first = true;
18993 num_regs = bit_count (saved_regs_mask);
18995 /* Must be at least one register to save, and can't save SP or PC. */
18996 gcc_assert (num_regs > 0 && num_regs <= 14);
18997 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
18998 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19000 /* Create sequence for DWARF info. All the frame-related data for
19001 debugging is held in this wrapper. */
19002 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19004 /* Describe the stack adjustment. */
19005 tmp = gen_rtx_SET (VOIDmode,
19006 stack_pointer_rtx,
19007 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19008 RTX_FRAME_RELATED_P (tmp) = 1;
19009 XVECEXP (dwarf, 0, 0) = tmp;
19011 /* Find the first register. */
19012 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19015 i = 0;
19017 /* If there's an odd number of registers to push. Start off by
19018 pushing a single register. This ensures that subsequent strd
19019 operations are dword aligned (assuming that SP was originally
19020 64-bit aligned). */
19021 if ((num_regs & 1) != 0)
19023 rtx reg, mem, insn;
19025 reg = gen_rtx_REG (SImode, regno);
19026 if (num_regs == 1)
19027 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19028 stack_pointer_rtx));
19029 else
19030 mem = gen_frame_mem (Pmode,
19031 gen_rtx_PRE_MODIFY
19032 (Pmode, stack_pointer_rtx,
19033 plus_constant (Pmode, stack_pointer_rtx,
19034 -4 * num_regs)));
19036 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19037 RTX_FRAME_RELATED_P (tmp) = 1;
19038 insn = emit_insn (tmp);
19039 RTX_FRAME_RELATED_P (insn) = 1;
19040 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19041 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19042 reg);
19043 RTX_FRAME_RELATED_P (tmp) = 1;
19044 i++;
19045 regno++;
19046 XVECEXP (dwarf, 0, i) = tmp;
19047 first = false;
19050 while (i < num_regs)
19051 if (saved_regs_mask & (1 << regno))
19053 rtx reg1, reg2, mem1, mem2;
19054 rtx tmp0, tmp1, tmp2;
19055 int regno2;
19057 /* Find the register to pair with this one. */
19058 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19059 regno2++)
19062 reg1 = gen_rtx_REG (SImode, regno);
19063 reg2 = gen_rtx_REG (SImode, regno2);
19065 if (first)
19067 rtx insn;
19069 first = false;
19070 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19071 stack_pointer_rtx,
19072 -4 * num_regs));
19073 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19074 stack_pointer_rtx,
19075 -4 * (num_regs - 1)));
19076 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19077 plus_constant (Pmode, stack_pointer_rtx,
19078 -4 * (num_regs)));
19079 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19080 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19081 RTX_FRAME_RELATED_P (tmp0) = 1;
19082 RTX_FRAME_RELATED_P (tmp1) = 1;
19083 RTX_FRAME_RELATED_P (tmp2) = 1;
19084 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19085 XVECEXP (par, 0, 0) = tmp0;
19086 XVECEXP (par, 0, 1) = tmp1;
19087 XVECEXP (par, 0, 2) = tmp2;
19088 insn = emit_insn (par);
19089 RTX_FRAME_RELATED_P (insn) = 1;
19090 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19092 else
19094 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19095 stack_pointer_rtx,
19096 4 * i));
19097 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19098 stack_pointer_rtx,
19099 4 * (i + 1)));
19100 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19101 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19102 RTX_FRAME_RELATED_P (tmp1) = 1;
19103 RTX_FRAME_RELATED_P (tmp2) = 1;
19104 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19105 XVECEXP (par, 0, 0) = tmp1;
19106 XVECEXP (par, 0, 1) = tmp2;
19107 emit_insn (par);
19110 /* Create unwind information. This is an approximation. */
19111 tmp1 = gen_rtx_SET (VOIDmode,
19112 gen_frame_mem (Pmode,
19113 plus_constant (Pmode,
19114 stack_pointer_rtx,
19115 4 * i)),
19116 reg1);
19117 tmp2 = gen_rtx_SET (VOIDmode,
19118 gen_frame_mem (Pmode,
19119 plus_constant (Pmode,
19120 stack_pointer_rtx,
19121 4 * (i + 1))),
19122 reg2);
19124 RTX_FRAME_RELATED_P (tmp1) = 1;
19125 RTX_FRAME_RELATED_P (tmp2) = 1;
19126 XVECEXP (dwarf, 0, i + 1) = tmp1;
19127 XVECEXP (dwarf, 0, i + 2) = tmp2;
19128 i += 2;
19129 regno = regno2 + 1;
19131 else
19132 regno++;
19134 return;
19137 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19138 whenever possible, otherwise it emits single-word stores. The first store
19139 also allocates stack space for all saved registers, using writeback with
19140 post-addressing mode. All other stores use offset addressing. If no STRD
19141 can be emitted, this function emits a sequence of single-word stores,
19142 and not an STM as before, because single-word stores provide more freedom
19143 scheduling and can be turned into an STM by peephole optimizations. */
19144 static void
19145 arm_emit_strd_push (unsigned long saved_regs_mask)
19147 int num_regs = 0;
19148 int i, j, dwarf_index = 0;
19149 int offset = 0;
19150 rtx dwarf = NULL_RTX;
19151 rtx insn = NULL_RTX;
19152 rtx tmp, mem;
19154 /* TODO: A more efficient code can be emitted by changing the
19155 layout, e.g., first push all pairs that can use STRD to keep the
19156 stack aligned, and then push all other registers. */
19157 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19158 if (saved_regs_mask & (1 << i))
19159 num_regs++;
19161 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19162 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19163 gcc_assert (num_regs > 0);
19165 /* Create sequence for DWARF info. */
19166 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19168 /* For dwarf info, we generate explicit stack update. */
19169 tmp = gen_rtx_SET (VOIDmode,
19170 stack_pointer_rtx,
19171 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19172 RTX_FRAME_RELATED_P (tmp) = 1;
19173 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19175 /* Save registers. */
19176 offset = - 4 * num_regs;
19177 j = 0;
19178 while (j <= LAST_ARM_REGNUM)
19179 if (saved_regs_mask & (1 << j))
19181 if ((j % 2 == 0)
19182 && (saved_regs_mask & (1 << (j + 1))))
19184 /* Current register and previous register form register pair for
19185 which STRD can be generated. */
19186 if (offset < 0)
19188 /* Allocate stack space for all saved registers. */
19189 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19190 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19191 mem = gen_frame_mem (DImode, tmp);
19192 offset = 0;
19194 else if (offset > 0)
19195 mem = gen_frame_mem (DImode,
19196 plus_constant (Pmode,
19197 stack_pointer_rtx,
19198 offset));
19199 else
19200 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19202 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19203 RTX_FRAME_RELATED_P (tmp) = 1;
19204 tmp = emit_insn (tmp);
19206 /* Record the first store insn. */
19207 if (dwarf_index == 1)
19208 insn = tmp;
19210 /* Generate dwarf info. */
19211 mem = gen_frame_mem (SImode,
19212 plus_constant (Pmode,
19213 stack_pointer_rtx,
19214 offset));
19215 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19216 RTX_FRAME_RELATED_P (tmp) = 1;
19217 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19219 mem = gen_frame_mem (SImode,
19220 plus_constant (Pmode,
19221 stack_pointer_rtx,
19222 offset + 4));
19223 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19224 RTX_FRAME_RELATED_P (tmp) = 1;
19225 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19227 offset += 8;
19228 j += 2;
19230 else
19232 /* Emit a single word store. */
19233 if (offset < 0)
19235 /* Allocate stack space for all saved registers. */
19236 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19237 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19238 mem = gen_frame_mem (SImode, tmp);
19239 offset = 0;
19241 else if (offset > 0)
19242 mem = gen_frame_mem (SImode,
19243 plus_constant (Pmode,
19244 stack_pointer_rtx,
19245 offset));
19246 else
19247 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19249 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19250 RTX_FRAME_RELATED_P (tmp) = 1;
19251 tmp = emit_insn (tmp);
19253 /* Record the first store insn. */
19254 if (dwarf_index == 1)
19255 insn = tmp;
19257 /* Generate dwarf info. */
19258 mem = gen_frame_mem (SImode,
19259 plus_constant(Pmode,
19260 stack_pointer_rtx,
19261 offset));
19262 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19263 RTX_FRAME_RELATED_P (tmp) = 1;
19264 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19266 offset += 4;
19267 j += 1;
19270 else
19271 j++;
19273 /* Attach dwarf info to the first insn we generate. */
19274 gcc_assert (insn != NULL_RTX);
19275 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19276 RTX_FRAME_RELATED_P (insn) = 1;
19279 /* Generate and emit an insn that we will recognize as a push_multi.
19280 Unfortunately, since this insn does not reflect very well the actual
19281 semantics of the operation, we need to annotate the insn for the benefit
19282 of DWARF2 frame unwind information. */
19283 static rtx
19284 emit_multi_reg_push (unsigned long mask)
19286 int num_regs = 0;
19287 int num_dwarf_regs;
19288 int i, j;
19289 rtx par;
19290 rtx dwarf;
19291 int dwarf_par_index;
19292 rtx tmp, reg;
19294 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19295 if (mask & (1 << i))
19296 num_regs++;
19298 gcc_assert (num_regs && num_regs <= 16);
19300 /* We don't record the PC in the dwarf frame information. */
19301 num_dwarf_regs = num_regs;
19302 if (mask & (1 << PC_REGNUM))
19303 num_dwarf_regs--;
19305 /* For the body of the insn we are going to generate an UNSPEC in
19306 parallel with several USEs. This allows the insn to be recognized
19307 by the push_multi pattern in the arm.md file.
19309 The body of the insn looks something like this:
19311 (parallel [
19312 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19313 (const_int:SI <num>)))
19314 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19315 (use (reg:SI XX))
19316 (use (reg:SI YY))
19320 For the frame note however, we try to be more explicit and actually
19321 show each register being stored into the stack frame, plus a (single)
19322 decrement of the stack pointer. We do it this way in order to be
19323 friendly to the stack unwinding code, which only wants to see a single
19324 stack decrement per instruction. The RTL we generate for the note looks
19325 something like this:
19327 (sequence [
19328 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19329 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19330 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19331 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19335 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19336 instead we'd have a parallel expression detailing all
19337 the stores to the various memory addresses so that debug
19338 information is more up-to-date. Remember however while writing
19339 this to take care of the constraints with the push instruction.
19341 Note also that this has to be taken care of for the VFP registers.
19343 For more see PR43399. */
19345 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19346 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19347 dwarf_par_index = 1;
19349 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19351 if (mask & (1 << i))
19353 reg = gen_rtx_REG (SImode, i);
19355 XVECEXP (par, 0, 0)
19356 = gen_rtx_SET (VOIDmode,
19357 gen_frame_mem
19358 (BLKmode,
19359 gen_rtx_PRE_MODIFY (Pmode,
19360 stack_pointer_rtx,
19361 plus_constant
19362 (Pmode, stack_pointer_rtx,
19363 -4 * num_regs))
19365 gen_rtx_UNSPEC (BLKmode,
19366 gen_rtvec (1, reg),
19367 UNSPEC_PUSH_MULT));
19369 if (i != PC_REGNUM)
19371 tmp = gen_rtx_SET (VOIDmode,
19372 gen_frame_mem (SImode, stack_pointer_rtx),
19373 reg);
19374 RTX_FRAME_RELATED_P (tmp) = 1;
19375 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19376 dwarf_par_index++;
19379 break;
19383 for (j = 1, i++; j < num_regs; i++)
19385 if (mask & (1 << i))
19387 reg = gen_rtx_REG (SImode, i);
19389 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19391 if (i != PC_REGNUM)
19394 = gen_rtx_SET (VOIDmode,
19395 gen_frame_mem
19396 (SImode,
19397 plus_constant (Pmode, stack_pointer_rtx,
19398 4 * j)),
19399 reg);
19400 RTX_FRAME_RELATED_P (tmp) = 1;
19401 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19404 j++;
19408 par = emit_insn (par);
19410 tmp = gen_rtx_SET (VOIDmode,
19411 stack_pointer_rtx,
19412 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19413 RTX_FRAME_RELATED_P (tmp) = 1;
19414 XVECEXP (dwarf, 0, 0) = tmp;
19416 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19418 return par;
19421 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19422 SIZE is the offset to be adjusted.
19423 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19424 static void
19425 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19427 rtx dwarf;
19429 RTX_FRAME_RELATED_P (insn) = 1;
19430 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19431 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19434 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19435 SAVED_REGS_MASK shows which registers need to be restored.
19437 Unfortunately, since this insn does not reflect very well the actual
19438 semantics of the operation, we need to annotate the insn for the benefit
19439 of DWARF2 frame unwind information. */
19440 static void
19441 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19443 int num_regs = 0;
19444 int i, j;
19445 rtx par;
19446 rtx dwarf = NULL_RTX;
19447 rtx tmp, reg;
19448 bool return_in_pc;
19449 int offset_adj;
19450 int emit_update;
19452 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19453 offset_adj = return_in_pc ? 1 : 0;
19454 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19455 if (saved_regs_mask & (1 << i))
19456 num_regs++;
19458 gcc_assert (num_regs && num_regs <= 16);
19460 /* If SP is in reglist, then we don't emit SP update insn. */
19461 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19463 /* The parallel needs to hold num_regs SETs
19464 and one SET for the stack update. */
19465 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19467 if (return_in_pc)
19469 tmp = ret_rtx;
19470 XVECEXP (par, 0, 0) = tmp;
19473 if (emit_update)
19475 /* Increment the stack pointer, based on there being
19476 num_regs 4-byte registers to restore. */
19477 tmp = gen_rtx_SET (VOIDmode,
19478 stack_pointer_rtx,
19479 plus_constant (Pmode,
19480 stack_pointer_rtx,
19481 4 * num_regs));
19482 RTX_FRAME_RELATED_P (tmp) = 1;
19483 XVECEXP (par, 0, offset_adj) = tmp;
19486 /* Now restore every reg, which may include PC. */
19487 for (j = 0, i = 0; j < num_regs; i++)
19488 if (saved_regs_mask & (1 << i))
19490 reg = gen_rtx_REG (SImode, i);
19491 if ((num_regs == 1) && emit_update && !return_in_pc)
19493 /* Emit single load with writeback. */
19494 tmp = gen_frame_mem (SImode,
19495 gen_rtx_POST_INC (Pmode,
19496 stack_pointer_rtx));
19497 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19498 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19499 return;
19502 tmp = gen_rtx_SET (VOIDmode,
19503 reg,
19504 gen_frame_mem
19505 (SImode,
19506 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19507 RTX_FRAME_RELATED_P (tmp) = 1;
19508 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19510 /* We need to maintain a sequence for DWARF info too. As dwarf info
19511 should not have PC, skip PC. */
19512 if (i != PC_REGNUM)
19513 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19515 j++;
19518 if (return_in_pc)
19519 par = emit_jump_insn (par);
19520 else
19521 par = emit_insn (par);
19523 REG_NOTES (par) = dwarf;
19524 if (!return_in_pc)
19525 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19526 stack_pointer_rtx, stack_pointer_rtx);
19529 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19530 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19532 Unfortunately, since this insn does not reflect very well the actual
19533 semantics of the operation, we need to annotate the insn for the benefit
19534 of DWARF2 frame unwind information. */
19535 static void
19536 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19538 int i, j;
19539 rtx par;
19540 rtx dwarf = NULL_RTX;
19541 rtx tmp, reg;
19543 gcc_assert (num_regs && num_regs <= 32);
19545 /* Workaround ARM10 VFPr1 bug. */
19546 if (num_regs == 2 && !arm_arch6)
19548 if (first_reg == 15)
19549 first_reg--;
19551 num_regs++;
19554 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19555 there could be up to 32 D-registers to restore.
19556 If there are more than 16 D-registers, make two recursive calls,
19557 each of which emits one pop_multi instruction. */
19558 if (num_regs > 16)
19560 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19561 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19562 return;
19565 /* The parallel needs to hold num_regs SETs
19566 and one SET for the stack update. */
19567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19569 /* Increment the stack pointer, based on there being
19570 num_regs 8-byte registers to restore. */
19571 tmp = gen_rtx_SET (VOIDmode,
19572 base_reg,
19573 plus_constant (Pmode, base_reg, 8 * num_regs));
19574 RTX_FRAME_RELATED_P (tmp) = 1;
19575 XVECEXP (par, 0, 0) = tmp;
19577 /* Now show every reg that will be restored, using a SET for each. */
19578 for (j = 0, i=first_reg; j < num_regs; i += 2)
19580 reg = gen_rtx_REG (DFmode, i);
19582 tmp = gen_rtx_SET (VOIDmode,
19583 reg,
19584 gen_frame_mem
19585 (DFmode,
19586 plus_constant (Pmode, base_reg, 8 * j)));
19587 RTX_FRAME_RELATED_P (tmp) = 1;
19588 XVECEXP (par, 0, j + 1) = tmp;
19590 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19592 j++;
19595 par = emit_insn (par);
19596 REG_NOTES (par) = dwarf;
19598 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19599 base_reg, base_reg);
19602 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19603 number of registers are being popped, multiple LDRD patterns are created for
19604 all register pairs. If odd number of registers are popped, last register is
19605 loaded by using LDR pattern. */
19606 static void
19607 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19609 int num_regs = 0;
19610 int i, j;
19611 rtx par = NULL_RTX;
19612 rtx dwarf = NULL_RTX;
19613 rtx tmp, reg, tmp1;
19614 bool return_in_pc;
19616 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19617 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19618 if (saved_regs_mask & (1 << i))
19619 num_regs++;
19621 gcc_assert (num_regs && num_regs <= 16);
19623 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19624 to be popped. So, if num_regs is even, now it will become odd,
19625 and we can generate pop with PC. If num_regs is odd, it will be
19626 even now, and ldr with return can be generated for PC. */
19627 if (return_in_pc)
19628 num_regs--;
19630 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19632 /* Var j iterates over all the registers to gather all the registers in
19633 saved_regs_mask. Var i gives index of saved registers in stack frame.
19634 A PARALLEL RTX of register-pair is created here, so that pattern for
19635 LDRD can be matched. As PC is always last register to be popped, and
19636 we have already decremented num_regs if PC, we don't have to worry
19637 about PC in this loop. */
19638 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19639 if (saved_regs_mask & (1 << j))
19641 /* Create RTX for memory load. */
19642 reg = gen_rtx_REG (SImode, j);
19643 tmp = gen_rtx_SET (SImode,
19644 reg,
19645 gen_frame_mem (SImode,
19646 plus_constant (Pmode,
19647 stack_pointer_rtx, 4 * i)));
19648 RTX_FRAME_RELATED_P (tmp) = 1;
19650 if (i % 2 == 0)
19652 /* When saved-register index (i) is even, the RTX to be emitted is
19653 yet to be created. Hence create it first. The LDRD pattern we
19654 are generating is :
19655 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19656 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19657 where target registers need not be consecutive. */
19658 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19659 dwarf = NULL_RTX;
19662 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19663 added as 0th element and if i is odd, reg_i is added as 1st element
19664 of LDRD pattern shown above. */
19665 XVECEXP (par, 0, (i % 2)) = tmp;
19666 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19668 if ((i % 2) == 1)
19670 /* When saved-register index (i) is odd, RTXs for both the registers
19671 to be loaded are generated in above given LDRD pattern, and the
19672 pattern can be emitted now. */
19673 par = emit_insn (par);
19674 REG_NOTES (par) = dwarf;
19675 RTX_FRAME_RELATED_P (par) = 1;
19678 i++;
19681 /* If the number of registers pushed is odd AND return_in_pc is false OR
19682 number of registers are even AND return_in_pc is true, last register is
19683 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19684 then LDR with post increment. */
19686 /* Increment the stack pointer, based on there being
19687 num_regs 4-byte registers to restore. */
19688 tmp = gen_rtx_SET (VOIDmode,
19689 stack_pointer_rtx,
19690 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19691 RTX_FRAME_RELATED_P (tmp) = 1;
19692 tmp = emit_insn (tmp);
19693 if (!return_in_pc)
19695 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19696 stack_pointer_rtx, stack_pointer_rtx);
19699 dwarf = NULL_RTX;
19701 if (((num_regs % 2) == 1 && !return_in_pc)
19702 || ((num_regs % 2) == 0 && return_in_pc))
19704 /* Scan for the single register to be popped. Skip until the saved
19705 register is found. */
19706 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19708 /* Gen LDR with post increment here. */
19709 tmp1 = gen_rtx_MEM (SImode,
19710 gen_rtx_POST_INC (SImode,
19711 stack_pointer_rtx));
19712 set_mem_alias_set (tmp1, get_frame_alias_set ());
19714 reg = gen_rtx_REG (SImode, j);
19715 tmp = gen_rtx_SET (SImode, reg, tmp1);
19716 RTX_FRAME_RELATED_P (tmp) = 1;
19717 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19719 if (return_in_pc)
19721 /* If return_in_pc, j must be PC_REGNUM. */
19722 gcc_assert (j == PC_REGNUM);
19723 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19724 XVECEXP (par, 0, 0) = ret_rtx;
19725 XVECEXP (par, 0, 1) = tmp;
19726 par = emit_jump_insn (par);
19728 else
19730 par = emit_insn (tmp);
19731 REG_NOTES (par) = dwarf;
19732 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19733 stack_pointer_rtx, stack_pointer_rtx);
19737 else if ((num_regs % 2) == 1 && return_in_pc)
19739 /* There are 2 registers to be popped. So, generate the pattern
19740 pop_multiple_with_stack_update_and_return to pop in PC. */
19741 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
19744 return;
19747 /* LDRD in ARM mode needs consecutive registers as operands. This function
19748 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19749 offset addressing and then generates one separate stack udpate. This provides
19750 more scheduling freedom, compared to writeback on every load. However,
19751 if the function returns using load into PC directly
19752 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19753 before the last load. TODO: Add a peephole optimization to recognize
19754 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19755 peephole optimization to merge the load at stack-offset zero
19756 with the stack update instruction using load with writeback
19757 in post-index addressing mode. */
19758 static void
19759 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
19761 int j = 0;
19762 int offset = 0;
19763 rtx par = NULL_RTX;
19764 rtx dwarf = NULL_RTX;
19765 rtx tmp, mem;
19767 /* Restore saved registers. */
19768 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
19769 j = 0;
19770 while (j <= LAST_ARM_REGNUM)
19771 if (saved_regs_mask & (1 << j))
19773 if ((j % 2) == 0
19774 && (saved_regs_mask & (1 << (j + 1)))
19775 && (j + 1) != PC_REGNUM)
19777 /* Current register and next register form register pair for which
19778 LDRD can be generated. PC is always the last register popped, and
19779 we handle it separately. */
19780 if (offset > 0)
19781 mem = gen_frame_mem (DImode,
19782 plus_constant (Pmode,
19783 stack_pointer_rtx,
19784 offset));
19785 else
19786 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19788 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
19789 tmp = emit_insn (tmp);
19790 RTX_FRAME_RELATED_P (tmp) = 1;
19792 /* Generate dwarf info. */
19794 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19795 gen_rtx_REG (SImode, j),
19796 NULL_RTX);
19797 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19798 gen_rtx_REG (SImode, j + 1),
19799 dwarf);
19801 REG_NOTES (tmp) = dwarf;
19803 offset += 8;
19804 j += 2;
19806 else if (j != PC_REGNUM)
19808 /* Emit a single word load. */
19809 if (offset > 0)
19810 mem = gen_frame_mem (SImode,
19811 plus_constant (Pmode,
19812 stack_pointer_rtx,
19813 offset));
19814 else
19815 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19817 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
19818 tmp = emit_insn (tmp);
19819 RTX_FRAME_RELATED_P (tmp) = 1;
19821 /* Generate dwarf info. */
19822 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
19823 gen_rtx_REG (SImode, j),
19824 NULL_RTX);
19826 offset += 4;
19827 j += 1;
19829 else /* j == PC_REGNUM */
19830 j++;
19832 else
19833 j++;
19835 /* Update the stack. */
19836 if (offset > 0)
19838 tmp = gen_rtx_SET (Pmode,
19839 stack_pointer_rtx,
19840 plus_constant (Pmode,
19841 stack_pointer_rtx,
19842 offset));
19843 tmp = emit_insn (tmp);
19844 arm_add_cfa_adjust_cfa_note (tmp, offset,
19845 stack_pointer_rtx, stack_pointer_rtx);
19846 offset = 0;
19849 if (saved_regs_mask & (1 << PC_REGNUM))
19851 /* Only PC is to be popped. */
19852 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19853 XVECEXP (par, 0, 0) = ret_rtx;
19854 tmp = gen_rtx_SET (SImode,
19855 gen_rtx_REG (SImode, PC_REGNUM),
19856 gen_frame_mem (SImode,
19857 gen_rtx_POST_INC (SImode,
19858 stack_pointer_rtx)));
19859 RTX_FRAME_RELATED_P (tmp) = 1;
19860 XVECEXP (par, 0, 1) = tmp;
19861 par = emit_jump_insn (par);
19863 /* Generate dwarf info. */
19864 dwarf = alloc_reg_note (REG_CFA_RESTORE,
19865 gen_rtx_REG (SImode, PC_REGNUM),
19866 NULL_RTX);
19867 REG_NOTES (par) = dwarf;
19868 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
19869 stack_pointer_rtx, stack_pointer_rtx);
19873 /* Calculate the size of the return value that is passed in registers. */
19874 static unsigned
19875 arm_size_return_regs (void)
19877 enum machine_mode mode;
19879 if (crtl->return_rtx != 0)
19880 mode = GET_MODE (crtl->return_rtx);
19881 else
19882 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19884 return GET_MODE_SIZE (mode);
19887 /* Return true if the current function needs to save/restore LR. */
19888 static bool
19889 thumb_force_lr_save (void)
19891 return !cfun->machine->lr_save_eliminated
19892 && (!leaf_function_p ()
19893 || thumb_far_jump_used_p ()
19894 || df_regs_ever_live_p (LR_REGNUM));
19897 /* We do not know if r3 will be available because
19898 we do have an indirect tailcall happening in this
19899 particular case. */
19900 static bool
19901 is_indirect_tailcall_p (rtx call)
19903 rtx pat = PATTERN (call);
19905 /* Indirect tail call. */
19906 pat = XVECEXP (pat, 0, 0);
19907 if (GET_CODE (pat) == SET)
19908 pat = SET_SRC (pat);
19910 pat = XEXP (XEXP (pat, 0), 0);
19911 return REG_P (pat);
19914 /* Return true if r3 is used by any of the tail call insns in the
19915 current function. */
19916 static bool
19917 any_sibcall_could_use_r3 (void)
19919 edge_iterator ei;
19920 edge e;
19922 if (!crtl->tail_call_emit)
19923 return false;
19924 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
19925 if (e->flags & EDGE_SIBCALL)
19927 rtx call = BB_END (e->src);
19928 if (!CALL_P (call))
19929 call = prev_nonnote_nondebug_insn (call);
19930 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
19931 if (find_regno_fusage (call, USE, 3)
19932 || is_indirect_tailcall_p (call))
19933 return true;
19935 return false;
19939 /* Compute the distance from register FROM to register TO.
19940 These can be the arg pointer (26), the soft frame pointer (25),
19941 the stack pointer (13) or the hard frame pointer (11).
19942 In thumb mode r7 is used as the soft frame pointer, if needed.
19943 Typical stack layout looks like this:
19945 old stack pointer -> | |
19946 ----
19947 | | \
19948 | | saved arguments for
19949 | | vararg functions
19950 | | /
19952 hard FP & arg pointer -> | | \
19953 | | stack
19954 | | frame
19955 | | /
19957 | | \
19958 | | call saved
19959 | | registers
19960 soft frame pointer -> | | /
19962 | | \
19963 | | local
19964 | | variables
19965 locals base pointer -> | | /
19967 | | \
19968 | | outgoing
19969 | | arguments
19970 current stack pointer -> | | /
19973 For a given function some or all of these stack components
19974 may not be needed, giving rise to the possibility of
19975 eliminating some of the registers.
19977 The values returned by this function must reflect the behavior
19978 of arm_expand_prologue() and arm_compute_save_reg_mask().
19980 The sign of the number returned reflects the direction of stack
19981 growth, so the values are positive for all eliminations except
19982 from the soft frame pointer to the hard frame pointer.
19984 SFP may point just inside the local variables block to ensure correct
19985 alignment. */
19988 /* Calculate stack offsets. These are used to calculate register elimination
19989 offsets and in prologue/epilogue code. Also calculates which registers
19990 should be saved. */
19992 static arm_stack_offsets *
19993 arm_get_frame_offsets (void)
19995 struct arm_stack_offsets *offsets;
19996 unsigned long func_type;
19997 int leaf;
19998 int saved;
19999 int core_saved;
20000 HOST_WIDE_INT frame_size;
20001 int i;
20003 offsets = &cfun->machine->stack_offsets;
20005 /* We need to know if we are a leaf function. Unfortunately, it
20006 is possible to be called after start_sequence has been called,
20007 which causes get_insns to return the insns for the sequence,
20008 not the function, which will cause leaf_function_p to return
20009 the incorrect result.
20011 to know about leaf functions once reload has completed, and the
20012 frame size cannot be changed after that time, so we can safely
20013 use the cached value. */
20015 if (reload_completed)
20016 return offsets;
20018 /* Initially this is the size of the local variables. It will translated
20019 into an offset once we have determined the size of preceding data. */
20020 frame_size = ROUND_UP_WORD (get_frame_size ());
20022 leaf = leaf_function_p ();
20024 /* Space for variadic functions. */
20025 offsets->saved_args = crtl->args.pretend_args_size;
20027 /* In Thumb mode this is incorrect, but never used. */
20028 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
20029 arm_compute_static_chain_stack_bytes();
20031 if (TARGET_32BIT)
20033 unsigned int regno;
20035 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20036 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20037 saved = core_saved;
20039 /* We know that SP will be doubleword aligned on entry, and we must
20040 preserve that condition at any subroutine call. We also require the
20041 soft frame pointer to be doubleword aligned. */
20043 if (TARGET_REALLY_IWMMXT)
20045 /* Check for the call-saved iWMMXt registers. */
20046 for (regno = FIRST_IWMMXT_REGNUM;
20047 regno <= LAST_IWMMXT_REGNUM;
20048 regno++)
20049 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20050 saved += 8;
20053 func_type = arm_current_func_type ();
20054 /* Space for saved VFP registers. */
20055 if (! IS_VOLATILE (func_type)
20056 && TARGET_HARD_FLOAT && TARGET_VFP)
20057 saved += arm_get_vfp_saved_size ();
20059 else /* TARGET_THUMB1 */
20061 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20062 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20063 saved = core_saved;
20064 if (TARGET_BACKTRACE)
20065 saved += 16;
20068 /* Saved registers include the stack frame. */
20069 offsets->saved_regs = offsets->saved_args + saved +
20070 arm_compute_static_chain_stack_bytes();
20071 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20072 /* A leaf function does not need any stack alignment if it has nothing
20073 on the stack. */
20074 if (leaf && frame_size == 0
20075 /* However if it calls alloca(), we have a dynamically allocated
20076 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20077 && ! cfun->calls_alloca)
20079 offsets->outgoing_args = offsets->soft_frame;
20080 offsets->locals_base = offsets->soft_frame;
20081 return offsets;
20084 /* Ensure SFP has the correct alignment. */
20085 if (ARM_DOUBLEWORD_ALIGN
20086 && (offsets->soft_frame & 7))
20088 offsets->soft_frame += 4;
20089 /* Try to align stack by pushing an extra reg. Don't bother doing this
20090 when there is a stack frame as the alignment will be rolled into
20091 the normal stack adjustment. */
20092 if (frame_size + crtl->outgoing_args_size == 0)
20094 int reg = -1;
20096 /* If it is safe to use r3, then do so. This sometimes
20097 generates better code on Thumb-2 by avoiding the need to
20098 use 32-bit push/pop instructions. */
20099 if (! any_sibcall_could_use_r3 ()
20100 && arm_size_return_regs () <= 12
20101 && (offsets->saved_regs_mask & (1 << 3)) == 0
20102 && (TARGET_THUMB2
20103 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20105 reg = 3;
20107 else
20108 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20110 /* Avoid fixed registers; they may be changed at
20111 arbitrary times so it's unsafe to restore them
20112 during the epilogue. */
20113 if (!fixed_regs[i]
20114 && (offsets->saved_regs_mask & (1 << i)) == 0)
20116 reg = i;
20117 break;
20121 if (reg != -1)
20123 offsets->saved_regs += 4;
20124 offsets->saved_regs_mask |= (1 << reg);
20129 offsets->locals_base = offsets->soft_frame + frame_size;
20130 offsets->outgoing_args = (offsets->locals_base
20131 + crtl->outgoing_args_size);
20133 if (ARM_DOUBLEWORD_ALIGN)
20135 /* Ensure SP remains doubleword aligned. */
20136 if (offsets->outgoing_args & 7)
20137 offsets->outgoing_args += 4;
20138 gcc_assert (!(offsets->outgoing_args & 7));
20141 return offsets;
20145 /* Calculate the relative offsets for the different stack pointers. Positive
20146 offsets are in the direction of stack growth. */
20148 HOST_WIDE_INT
20149 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20151 arm_stack_offsets *offsets;
20153 offsets = arm_get_frame_offsets ();
20155 /* OK, now we have enough information to compute the distances.
20156 There must be an entry in these switch tables for each pair
20157 of registers in ELIMINABLE_REGS, even if some of the entries
20158 seem to be redundant or useless. */
20159 switch (from)
20161 case ARG_POINTER_REGNUM:
20162 switch (to)
20164 case THUMB_HARD_FRAME_POINTER_REGNUM:
20165 return 0;
20167 case FRAME_POINTER_REGNUM:
20168 /* This is the reverse of the soft frame pointer
20169 to hard frame pointer elimination below. */
20170 return offsets->soft_frame - offsets->saved_args;
20172 case ARM_HARD_FRAME_POINTER_REGNUM:
20173 /* This is only non-zero in the case where the static chain register
20174 is stored above the frame. */
20175 return offsets->frame - offsets->saved_args - 4;
20177 case STACK_POINTER_REGNUM:
20178 /* If nothing has been pushed on the stack at all
20179 then this will return -4. This *is* correct! */
20180 return offsets->outgoing_args - (offsets->saved_args + 4);
20182 default:
20183 gcc_unreachable ();
20185 gcc_unreachable ();
20187 case FRAME_POINTER_REGNUM:
20188 switch (to)
20190 case THUMB_HARD_FRAME_POINTER_REGNUM:
20191 return 0;
20193 case ARM_HARD_FRAME_POINTER_REGNUM:
20194 /* The hard frame pointer points to the top entry in the
20195 stack frame. The soft frame pointer to the bottom entry
20196 in the stack frame. If there is no stack frame at all,
20197 then they are identical. */
20199 return offsets->frame - offsets->soft_frame;
20201 case STACK_POINTER_REGNUM:
20202 return offsets->outgoing_args - offsets->soft_frame;
20204 default:
20205 gcc_unreachable ();
20207 gcc_unreachable ();
20209 default:
20210 /* You cannot eliminate from the stack pointer.
20211 In theory you could eliminate from the hard frame
20212 pointer to the stack pointer, but this will never
20213 happen, since if a stack frame is not needed the
20214 hard frame pointer will never be used. */
20215 gcc_unreachable ();
20219 /* Given FROM and TO register numbers, say whether this elimination is
20220 allowed. Frame pointer elimination is automatically handled.
20222 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20223 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20224 pointer, we must eliminate FRAME_POINTER_REGNUM into
20225 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20226 ARG_POINTER_REGNUM. */
20228 bool
20229 arm_can_eliminate (const int from, const int to)
20231 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20232 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20233 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20234 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20235 true);
20238 /* Emit RTL to save coprocessor registers on function entry. Returns the
20239 number of bytes pushed. */
20241 static int
20242 arm_save_coproc_regs(void)
20244 int saved_size = 0;
20245 unsigned reg;
20246 unsigned start_reg;
20247 rtx insn;
20249 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20250 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20252 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20253 insn = gen_rtx_MEM (V2SImode, insn);
20254 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20255 RTX_FRAME_RELATED_P (insn) = 1;
20256 saved_size += 8;
20259 if (TARGET_HARD_FLOAT && TARGET_VFP)
20261 start_reg = FIRST_VFP_REGNUM;
20263 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20265 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20266 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20268 if (start_reg != reg)
20269 saved_size += vfp_emit_fstmd (start_reg,
20270 (reg - start_reg) / 2);
20271 start_reg = reg + 2;
20274 if (start_reg != reg)
20275 saved_size += vfp_emit_fstmd (start_reg,
20276 (reg - start_reg) / 2);
20278 return saved_size;
20282 /* Set the Thumb frame pointer from the stack pointer. */
20284 static void
20285 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20287 HOST_WIDE_INT amount;
20288 rtx insn, dwarf;
20290 amount = offsets->outgoing_args - offsets->locals_base;
20291 if (amount < 1024)
20292 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20293 stack_pointer_rtx, GEN_INT (amount)));
20294 else
20296 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20297 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20298 expects the first two operands to be the same. */
20299 if (TARGET_THUMB2)
20301 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20302 stack_pointer_rtx,
20303 hard_frame_pointer_rtx));
20305 else
20307 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20308 hard_frame_pointer_rtx,
20309 stack_pointer_rtx));
20311 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20312 plus_constant (Pmode, stack_pointer_rtx, amount));
20313 RTX_FRAME_RELATED_P (dwarf) = 1;
20314 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20317 RTX_FRAME_RELATED_P (insn) = 1;
20320 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20321 function. */
20322 void
20323 arm_expand_prologue (void)
20325 rtx amount;
20326 rtx insn;
20327 rtx ip_rtx;
20328 unsigned long live_regs_mask;
20329 unsigned long func_type;
20330 int fp_offset = 0;
20331 int saved_pretend_args = 0;
20332 int saved_regs = 0;
20333 unsigned HOST_WIDE_INT args_to_push;
20334 arm_stack_offsets *offsets;
20336 func_type = arm_current_func_type ();
20338 /* Naked functions don't have prologues. */
20339 if (IS_NAKED (func_type))
20340 return;
20342 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20343 args_to_push = crtl->args.pretend_args_size;
20345 /* Compute which register we will have to save onto the stack. */
20346 offsets = arm_get_frame_offsets ();
20347 live_regs_mask = offsets->saved_regs_mask;
20349 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20351 if (IS_STACKALIGN (func_type))
20353 rtx r0, r1;
20355 /* Handle a word-aligned stack pointer. We generate the following:
20357 mov r0, sp
20358 bic r1, r0, #7
20359 mov sp, r1
20360 <save and restore r0 in normal prologue/epilogue>
20361 mov sp, r0
20362 bx lr
20364 The unwinder doesn't need to know about the stack realignment.
20365 Just tell it we saved SP in r0. */
20366 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20368 r0 = gen_rtx_REG (SImode, 0);
20369 r1 = gen_rtx_REG (SImode, 1);
20371 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20372 RTX_FRAME_RELATED_P (insn) = 1;
20373 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20375 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20377 /* ??? The CFA changes here, which may cause GDB to conclude that it
20378 has entered a different function. That said, the unwind info is
20379 correct, individually, before and after this instruction because
20380 we've described the save of SP, which will override the default
20381 handling of SP as restoring from the CFA. */
20382 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20385 /* For APCS frames, if IP register is clobbered
20386 when creating frame, save that register in a special
20387 way. */
20388 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20390 if (IS_INTERRUPT (func_type))
20392 /* Interrupt functions must not corrupt any registers.
20393 Creating a frame pointer however, corrupts the IP
20394 register, so we must push it first. */
20395 emit_multi_reg_push (1 << IP_REGNUM);
20397 /* Do not set RTX_FRAME_RELATED_P on this insn.
20398 The dwarf stack unwinding code only wants to see one
20399 stack decrement per function, and this is not it. If
20400 this instruction is labeled as being part of the frame
20401 creation sequence then dwarf2out_frame_debug_expr will
20402 die when it encounters the assignment of IP to FP
20403 later on, since the use of SP here establishes SP as
20404 the CFA register and not IP.
20406 Anyway this instruction is not really part of the stack
20407 frame creation although it is part of the prologue. */
20409 else if (IS_NESTED (func_type))
20411 /* The static chain register is the same as the IP register
20412 used as a scratch register during stack frame creation.
20413 To get around this need to find somewhere to store IP
20414 whilst the frame is being created. We try the following
20415 places in order:
20417 1. The last argument register r3.
20418 2. A slot on the stack above the frame. (This only
20419 works if the function is not a varargs function).
20420 3. Register r3 again, after pushing the argument registers
20421 onto the stack.
20423 Note - we only need to tell the dwarf2 backend about the SP
20424 adjustment in the second variant; the static chain register
20425 doesn't need to be unwound, as it doesn't contain a value
20426 inherited from the caller. */
20428 if (!arm_r3_live_at_start_p ())
20429 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20430 else if (args_to_push == 0)
20432 rtx dwarf;
20434 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20435 saved_regs += 4;
20437 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
20438 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
20439 fp_offset = 4;
20441 /* Just tell the dwarf backend that we adjusted SP. */
20442 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20443 plus_constant (Pmode, stack_pointer_rtx,
20444 -fp_offset));
20445 RTX_FRAME_RELATED_P (insn) = 1;
20446 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20448 else
20450 /* Store the args on the stack. */
20451 if (cfun->machine->uses_anonymous_args)
20452 insn = emit_multi_reg_push
20453 ((0xf0 >> (args_to_push / 4)) & 0xf);
20454 else
20455 insn = emit_insn
20456 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20457 GEN_INT (- args_to_push)));
20459 RTX_FRAME_RELATED_P (insn) = 1;
20461 saved_pretend_args = 1;
20462 fp_offset = args_to_push;
20463 args_to_push = 0;
20465 /* Now reuse r3 to preserve IP. */
20466 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20470 insn = emit_set_insn (ip_rtx,
20471 plus_constant (Pmode, stack_pointer_rtx,
20472 fp_offset));
20473 RTX_FRAME_RELATED_P (insn) = 1;
20476 if (args_to_push)
20478 /* Push the argument registers, or reserve space for them. */
20479 if (cfun->machine->uses_anonymous_args)
20480 insn = emit_multi_reg_push
20481 ((0xf0 >> (args_to_push / 4)) & 0xf);
20482 else
20483 insn = emit_insn
20484 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20485 GEN_INT (- args_to_push)));
20486 RTX_FRAME_RELATED_P (insn) = 1;
20489 /* If this is an interrupt service routine, and the link register
20490 is going to be pushed, and we're not generating extra
20491 push of IP (needed when frame is needed and frame layout if apcs),
20492 subtracting four from LR now will mean that the function return
20493 can be done with a single instruction. */
20494 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20495 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20496 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20497 && TARGET_ARM)
20499 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20501 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20504 if (live_regs_mask)
20506 saved_regs += bit_count (live_regs_mask) * 4;
20507 if (optimize_size && !frame_pointer_needed
20508 && saved_regs == offsets->saved_regs - offsets->saved_args)
20510 /* If no coprocessor registers are being pushed and we don't have
20511 to worry about a frame pointer then push extra registers to
20512 create the stack frame. This is done is a way that does not
20513 alter the frame layout, so is independent of the epilogue. */
20514 int n;
20515 int frame;
20516 n = 0;
20517 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20518 n++;
20519 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20520 if (frame && n * 4 >= frame)
20522 n = frame / 4;
20523 live_regs_mask |= (1 << n) - 1;
20524 saved_regs += frame;
20528 if (TARGET_LDRD
20529 && current_tune->prefer_ldrd_strd
20530 && !optimize_function_for_size_p (cfun))
20532 if (TARGET_THUMB2)
20534 thumb2_emit_strd_push (live_regs_mask);
20536 else if (TARGET_ARM
20537 && !TARGET_APCS_FRAME
20538 && !IS_INTERRUPT (func_type))
20540 arm_emit_strd_push (live_regs_mask);
20542 else
20544 insn = emit_multi_reg_push (live_regs_mask);
20545 RTX_FRAME_RELATED_P (insn) = 1;
20548 else
20550 insn = emit_multi_reg_push (live_regs_mask);
20551 RTX_FRAME_RELATED_P (insn) = 1;
20555 if (! IS_VOLATILE (func_type))
20556 saved_regs += arm_save_coproc_regs ();
20558 if (frame_pointer_needed && TARGET_ARM)
20560 /* Create the new frame pointer. */
20561 if (TARGET_APCS_FRAME)
20563 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20564 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20565 RTX_FRAME_RELATED_P (insn) = 1;
20567 if (IS_NESTED (func_type))
20569 /* Recover the static chain register. */
20570 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20571 insn = gen_rtx_REG (SImode, 3);
20572 else /* if (crtl->args.pretend_args_size == 0) */
20574 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20575 insn = gen_frame_mem (SImode, insn);
20577 emit_set_insn (ip_rtx, insn);
20578 /* Add a USE to stop propagate_one_insn() from barfing. */
20579 emit_insn (gen_force_register_use (ip_rtx));
20582 else
20584 insn = GEN_INT (saved_regs - 4);
20585 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20586 stack_pointer_rtx, insn));
20587 RTX_FRAME_RELATED_P (insn) = 1;
20591 if (flag_stack_usage_info)
20592 current_function_static_stack_size
20593 = offsets->outgoing_args - offsets->saved_args;
20595 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20597 /* This add can produce multiple insns for a large constant, so we
20598 need to get tricky. */
20599 rtx last = get_last_insn ();
20601 amount = GEN_INT (offsets->saved_args + saved_regs
20602 - offsets->outgoing_args);
20604 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20605 amount));
20608 last = last ? NEXT_INSN (last) : get_insns ();
20609 RTX_FRAME_RELATED_P (last) = 1;
20611 while (last != insn);
20613 /* If the frame pointer is needed, emit a special barrier that
20614 will prevent the scheduler from moving stores to the frame
20615 before the stack adjustment. */
20616 if (frame_pointer_needed)
20617 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20618 hard_frame_pointer_rtx));
20622 if (frame_pointer_needed && TARGET_THUMB2)
20623 thumb_set_frame_pointer (offsets);
20625 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20627 unsigned long mask;
20629 mask = live_regs_mask;
20630 mask &= THUMB2_WORK_REGS;
20631 if (!IS_NESTED (func_type))
20632 mask |= (1 << IP_REGNUM);
20633 arm_load_pic_register (mask);
20636 /* If we are profiling, make sure no instructions are scheduled before
20637 the call to mcount. Similarly if the user has requested no
20638 scheduling in the prolog. Similarly if we want non-call exceptions
20639 using the EABI unwinder, to prevent faulting instructions from being
20640 swapped with a stack adjustment. */
20641 if (crtl->profile || !TARGET_SCHED_PROLOG
20642 || (arm_except_unwind_info (&global_options) == UI_TARGET
20643 && cfun->can_throw_non_call_exceptions))
20644 emit_insn (gen_blockage ());
20646 /* If the link register is being kept alive, with the return address in it,
20647 then make sure that it does not get reused by the ce2 pass. */
20648 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20649 cfun->machine->lr_save_eliminated = 1;
20652 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20653 static void
20654 arm_print_condition (FILE *stream)
20656 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20658 /* Branch conversion is not implemented for Thumb-2. */
20659 if (TARGET_THUMB)
20661 output_operand_lossage ("predicated Thumb instruction");
20662 return;
20664 if (current_insn_predicate != NULL)
20666 output_operand_lossage
20667 ("predicated instruction in conditional sequence");
20668 return;
20671 fputs (arm_condition_codes[arm_current_cc], stream);
20673 else if (current_insn_predicate)
20675 enum arm_cond_code code;
20677 if (TARGET_THUMB1)
20679 output_operand_lossage ("predicated Thumb instruction");
20680 return;
20683 code = get_arm_condition_code (current_insn_predicate);
20684 fputs (arm_condition_codes[code], stream);
20689 /* If CODE is 'd', then the X is a condition operand and the instruction
20690 should only be executed if the condition is true.
20691 if CODE is 'D', then the X is a condition operand and the instruction
20692 should only be executed if the condition is false: however, if the mode
20693 of the comparison is CCFPEmode, then always execute the instruction -- we
20694 do this because in these circumstances !GE does not necessarily imply LT;
20695 in these cases the instruction pattern will take care to make sure that
20696 an instruction containing %d will follow, thereby undoing the effects of
20697 doing this instruction unconditionally.
20698 If CODE is 'N' then X is a floating point operand that must be negated
20699 before output.
20700 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20701 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20702 static void
20703 arm_print_operand (FILE *stream, rtx x, int code)
20705 switch (code)
20707 case '@':
20708 fputs (ASM_COMMENT_START, stream);
20709 return;
20711 case '_':
20712 fputs (user_label_prefix, stream);
20713 return;
20715 case '|':
20716 fputs (REGISTER_PREFIX, stream);
20717 return;
20719 case '?':
20720 arm_print_condition (stream);
20721 return;
20723 case '(':
20724 /* Nothing in unified syntax, otherwise the current condition code. */
20725 if (!TARGET_UNIFIED_ASM)
20726 arm_print_condition (stream);
20727 break;
20729 case ')':
20730 /* The current condition code in unified syntax, otherwise nothing. */
20731 if (TARGET_UNIFIED_ASM)
20732 arm_print_condition (stream);
20733 break;
20735 case '.':
20736 /* The current condition code for a condition code setting instruction.
20737 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20738 if (TARGET_UNIFIED_ASM)
20740 fputc('s', stream);
20741 arm_print_condition (stream);
20743 else
20745 arm_print_condition (stream);
20746 fputc('s', stream);
20748 return;
20750 case '!':
20751 /* If the instruction is conditionally executed then print
20752 the current condition code, otherwise print 's'. */
20753 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
20754 if (current_insn_predicate)
20755 arm_print_condition (stream);
20756 else
20757 fputc('s', stream);
20758 break;
20760 /* %# is a "break" sequence. It doesn't output anything, but is used to
20761 separate e.g. operand numbers from following text, if that text consists
20762 of further digits which we don't want to be part of the operand
20763 number. */
20764 case '#':
20765 return;
20767 case 'N':
20769 REAL_VALUE_TYPE r;
20770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
20771 r = real_value_negate (&r);
20772 fprintf (stream, "%s", fp_const_from_val (&r));
20774 return;
20776 /* An integer or symbol address without a preceding # sign. */
20777 case 'c':
20778 switch (GET_CODE (x))
20780 case CONST_INT:
20781 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
20782 break;
20784 case SYMBOL_REF:
20785 output_addr_const (stream, x);
20786 break;
20788 case CONST:
20789 if (GET_CODE (XEXP (x, 0)) == PLUS
20790 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
20792 output_addr_const (stream, x);
20793 break;
20795 /* Fall through. */
20797 default:
20798 output_operand_lossage ("Unsupported operand for code '%c'", code);
20800 return;
20802 /* An integer that we want to print in HEX. */
20803 case 'x':
20804 switch (GET_CODE (x))
20806 case CONST_INT:
20807 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
20808 break;
20810 default:
20811 output_operand_lossage ("Unsupported operand for code '%c'", code);
20813 return;
20815 case 'B':
20816 if (CONST_INT_P (x))
20818 HOST_WIDE_INT val;
20819 val = ARM_SIGN_EXTEND (~INTVAL (x));
20820 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
20822 else
20824 putc ('~', stream);
20825 output_addr_const (stream, x);
20827 return;
20829 case 'L':
20830 /* The low 16 bits of an immediate constant. */
20831 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
20832 return;
20834 case 'i':
20835 fprintf (stream, "%s", arithmetic_instr (x, 1));
20836 return;
20838 case 'I':
20839 fprintf (stream, "%s", arithmetic_instr (x, 0));
20840 return;
20842 case 'S':
20844 HOST_WIDE_INT val;
20845 const char *shift;
20847 shift = shift_op (x, &val);
20849 if (shift)
20851 fprintf (stream, ", %s ", shift);
20852 if (val == -1)
20853 arm_print_operand (stream, XEXP (x, 1), 0);
20854 else
20855 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
20858 return;
20860 /* An explanation of the 'Q', 'R' and 'H' register operands:
20862 In a pair of registers containing a DI or DF value the 'Q'
20863 operand returns the register number of the register containing
20864 the least significant part of the value. The 'R' operand returns
20865 the register number of the register containing the most
20866 significant part of the value.
20868 The 'H' operand returns the higher of the two register numbers.
20869 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20870 same as the 'Q' operand, since the most significant part of the
20871 value is held in the lower number register. The reverse is true
20872 on systems where WORDS_BIG_ENDIAN is false.
20874 The purpose of these operands is to distinguish between cases
20875 where the endian-ness of the values is important (for example
20876 when they are added together), and cases where the endian-ness
20877 is irrelevant, but the order of register operations is important.
20878 For example when loading a value from memory into a register
20879 pair, the endian-ness does not matter. Provided that the value
20880 from the lower memory address is put into the lower numbered
20881 register, and the value from the higher address is put into the
20882 higher numbered register, the load will work regardless of whether
20883 the value being loaded is big-wordian or little-wordian. The
20884 order of the two register loads can matter however, if the address
20885 of the memory location is actually held in one of the registers
20886 being overwritten by the load.
20888 The 'Q' and 'R' constraints are also available for 64-bit
20889 constants. */
20890 case 'Q':
20891 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20893 rtx part = gen_lowpart (SImode, x);
20894 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20895 return;
20898 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20900 output_operand_lossage ("invalid operand for code '%c'", code);
20901 return;
20904 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
20905 return;
20907 case 'R':
20908 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
20910 enum machine_mode mode = GET_MODE (x);
20911 rtx part;
20913 if (mode == VOIDmode)
20914 mode = DImode;
20915 part = gen_highpart_mode (SImode, mode, x);
20916 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
20917 return;
20920 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20922 output_operand_lossage ("invalid operand for code '%c'", code);
20923 return;
20926 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
20927 return;
20929 case 'H':
20930 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20932 output_operand_lossage ("invalid operand for code '%c'", code);
20933 return;
20936 asm_fprintf (stream, "%r", REGNO (x) + 1);
20937 return;
20939 case 'J':
20940 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20942 output_operand_lossage ("invalid operand for code '%c'", code);
20943 return;
20946 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
20947 return;
20949 case 'K':
20950 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
20952 output_operand_lossage ("invalid operand for code '%c'", code);
20953 return;
20956 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
20957 return;
20959 case 'm':
20960 asm_fprintf (stream, "%r",
20961 REG_P (XEXP (x, 0))
20962 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
20963 return;
20965 case 'M':
20966 asm_fprintf (stream, "{%r-%r}",
20967 REGNO (x),
20968 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
20969 return;
20971 /* Like 'M', but writing doubleword vector registers, for use by Neon
20972 insns. */
20973 case 'h':
20975 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
20976 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
20977 if (numregs == 1)
20978 asm_fprintf (stream, "{d%d}", regno);
20979 else
20980 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
20982 return;
20984 case 'd':
20985 /* CONST_TRUE_RTX means always -- that's the default. */
20986 if (x == const_true_rtx)
20987 return;
20989 if (!COMPARISON_P (x))
20991 output_operand_lossage ("invalid operand for code '%c'", code);
20992 return;
20995 fputs (arm_condition_codes[get_arm_condition_code (x)],
20996 stream);
20997 return;
20999 case 'D':
21000 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21001 want to do that. */
21002 if (x == const_true_rtx)
21004 output_operand_lossage ("instruction never executed");
21005 return;
21007 if (!COMPARISON_P (x))
21009 output_operand_lossage ("invalid operand for code '%c'", code);
21010 return;
21013 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21014 (get_arm_condition_code (x))],
21015 stream);
21016 return;
21018 case 's':
21019 case 'V':
21020 case 'W':
21021 case 'X':
21022 case 'Y':
21023 case 'Z':
21024 /* Former Maverick support, removed after GCC-4.7. */
21025 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21026 return;
21028 case 'U':
21029 if (!REG_P (x)
21030 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21031 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21032 /* Bad value for wCG register number. */
21034 output_operand_lossage ("invalid operand for code '%c'", code);
21035 return;
21038 else
21039 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21040 return;
21042 /* Print an iWMMXt control register name. */
21043 case 'w':
21044 if (!CONST_INT_P (x)
21045 || INTVAL (x) < 0
21046 || INTVAL (x) >= 16)
21047 /* Bad value for wC register number. */
21049 output_operand_lossage ("invalid operand for code '%c'", code);
21050 return;
21053 else
21055 static const char * wc_reg_names [16] =
21057 "wCID", "wCon", "wCSSF", "wCASF",
21058 "wC4", "wC5", "wC6", "wC7",
21059 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21060 "wC12", "wC13", "wC14", "wC15"
21063 fputs (wc_reg_names [INTVAL (x)], stream);
21065 return;
21067 /* Print the high single-precision register of a VFP double-precision
21068 register. */
21069 case 'p':
21071 int mode = GET_MODE (x);
21072 int regno;
21074 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21076 output_operand_lossage ("invalid operand for code '%c'", code);
21077 return;
21080 regno = REGNO (x);
21081 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21083 output_operand_lossage ("invalid operand for code '%c'", code);
21084 return;
21087 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21089 return;
21091 /* Print a VFP/Neon double precision or quad precision register name. */
21092 case 'P':
21093 case 'q':
21095 int mode = GET_MODE (x);
21096 int is_quad = (code == 'q');
21097 int regno;
21099 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21101 output_operand_lossage ("invalid operand for code '%c'", code);
21102 return;
21105 if (!REG_P (x)
21106 || !IS_VFP_REGNUM (REGNO (x)))
21108 output_operand_lossage ("invalid operand for code '%c'", code);
21109 return;
21112 regno = REGNO (x);
21113 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21114 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21116 output_operand_lossage ("invalid operand for code '%c'", code);
21117 return;
21120 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21121 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21123 return;
21125 /* These two codes print the low/high doubleword register of a Neon quad
21126 register, respectively. For pair-structure types, can also print
21127 low/high quadword registers. */
21128 case 'e':
21129 case 'f':
21131 int mode = GET_MODE (x);
21132 int regno;
21134 if ((GET_MODE_SIZE (mode) != 16
21135 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21137 output_operand_lossage ("invalid operand for code '%c'", code);
21138 return;
21141 regno = REGNO (x);
21142 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21144 output_operand_lossage ("invalid operand for code '%c'", code);
21145 return;
21148 if (GET_MODE_SIZE (mode) == 16)
21149 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21150 + (code == 'f' ? 1 : 0));
21151 else
21152 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21153 + (code == 'f' ? 1 : 0));
21155 return;
21157 /* Print a VFPv3 floating-point constant, represented as an integer
21158 index. */
21159 case 'G':
21161 int index = vfp3_const_double_index (x);
21162 gcc_assert (index != -1);
21163 fprintf (stream, "%d", index);
21165 return;
21167 /* Print bits representing opcode features for Neon.
21169 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21170 and polynomials as unsigned.
21172 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21174 Bit 2 is 1 for rounding functions, 0 otherwise. */
21176 /* Identify the type as 's', 'u', 'p' or 'f'. */
21177 case 'T':
21179 HOST_WIDE_INT bits = INTVAL (x);
21180 fputc ("uspf"[bits & 3], stream);
21182 return;
21184 /* Likewise, but signed and unsigned integers are both 'i'. */
21185 case 'F':
21187 HOST_WIDE_INT bits = INTVAL (x);
21188 fputc ("iipf"[bits & 3], stream);
21190 return;
21192 /* As for 'T', but emit 'u' instead of 'p'. */
21193 case 't':
21195 HOST_WIDE_INT bits = INTVAL (x);
21196 fputc ("usuf"[bits & 3], stream);
21198 return;
21200 /* Bit 2: rounding (vs none). */
21201 case 'O':
21203 HOST_WIDE_INT bits = INTVAL (x);
21204 fputs ((bits & 4) != 0 ? "r" : "", stream);
21206 return;
21208 /* Memory operand for vld1/vst1 instruction. */
21209 case 'A':
21211 rtx addr;
21212 bool postinc = FALSE;
21213 unsigned align, memsize, align_bits;
21215 gcc_assert (MEM_P (x));
21216 addr = XEXP (x, 0);
21217 if (GET_CODE (addr) == POST_INC)
21219 postinc = 1;
21220 addr = XEXP (addr, 0);
21222 asm_fprintf (stream, "[%r", REGNO (addr));
21224 /* We know the alignment of this access, so we can emit a hint in the
21225 instruction (for some alignments) as an aid to the memory subsystem
21226 of the target. */
21227 align = MEM_ALIGN (x) >> 3;
21228 memsize = MEM_SIZE (x);
21230 /* Only certain alignment specifiers are supported by the hardware. */
21231 if (memsize == 32 && (align % 32) == 0)
21232 align_bits = 256;
21233 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21234 align_bits = 128;
21235 else if (memsize >= 8 && (align % 8) == 0)
21236 align_bits = 64;
21237 else
21238 align_bits = 0;
21240 if (align_bits != 0)
21241 asm_fprintf (stream, ":%d", align_bits);
21243 asm_fprintf (stream, "]");
21245 if (postinc)
21246 fputs("!", stream);
21248 return;
21250 case 'C':
21252 rtx addr;
21254 gcc_assert (MEM_P (x));
21255 addr = XEXP (x, 0);
21256 gcc_assert (REG_P (addr));
21257 asm_fprintf (stream, "[%r]", REGNO (addr));
21259 return;
21261 /* Translate an S register number into a D register number and element index. */
21262 case 'y':
21264 int mode = GET_MODE (x);
21265 int regno;
21267 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21269 output_operand_lossage ("invalid operand for code '%c'", code);
21270 return;
21273 regno = REGNO (x);
21274 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21276 output_operand_lossage ("invalid operand for code '%c'", code);
21277 return;
21280 regno = regno - FIRST_VFP_REGNUM;
21281 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21283 return;
21285 case 'v':
21286 gcc_assert (CONST_DOUBLE_P (x));
21287 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
21288 return;
21290 /* Register specifier for vld1.16/vst1.16. Translate the S register
21291 number into a D register number and element index. */
21292 case 'z':
21294 int mode = GET_MODE (x);
21295 int regno;
21297 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21299 output_operand_lossage ("invalid operand for code '%c'", code);
21300 return;
21303 regno = REGNO (x);
21304 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21306 output_operand_lossage ("invalid operand for code '%c'", code);
21307 return;
21310 regno = regno - FIRST_VFP_REGNUM;
21311 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21313 return;
21315 default:
21316 if (x == 0)
21318 output_operand_lossage ("missing operand");
21319 return;
21322 switch (GET_CODE (x))
21324 case REG:
21325 asm_fprintf (stream, "%r", REGNO (x));
21326 break;
21328 case MEM:
21329 output_memory_reference_mode = GET_MODE (x);
21330 output_address (XEXP (x, 0));
21331 break;
21333 case CONST_DOUBLE:
21334 if (TARGET_NEON)
21336 char fpstr[20];
21337 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21338 sizeof (fpstr), 0, 1);
21339 fprintf (stream, "#%s", fpstr);
21341 else
21342 fprintf (stream, "#%s", fp_immediate_constant (x));
21343 break;
21345 default:
21346 gcc_assert (GET_CODE (x) != NEG);
21347 fputc ('#', stream);
21348 if (GET_CODE (x) == HIGH)
21350 fputs (":lower16:", stream);
21351 x = XEXP (x, 0);
21354 output_addr_const (stream, x);
21355 break;
21360 /* Target hook for printing a memory address. */
21361 static void
21362 arm_print_operand_address (FILE *stream, rtx x)
21364 if (TARGET_32BIT)
21366 int is_minus = GET_CODE (x) == MINUS;
21368 if (REG_P (x))
21369 asm_fprintf (stream, "[%r]", REGNO (x));
21370 else if (GET_CODE (x) == PLUS || is_minus)
21372 rtx base = XEXP (x, 0);
21373 rtx index = XEXP (x, 1);
21374 HOST_WIDE_INT offset = 0;
21375 if (!REG_P (base)
21376 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21378 /* Ensure that BASE is a register. */
21379 /* (one of them must be). */
21380 /* Also ensure the SP is not used as in index register. */
21381 rtx temp = base;
21382 base = index;
21383 index = temp;
21385 switch (GET_CODE (index))
21387 case CONST_INT:
21388 offset = INTVAL (index);
21389 if (is_minus)
21390 offset = -offset;
21391 asm_fprintf (stream, "[%r, #%wd]",
21392 REGNO (base), offset);
21393 break;
21395 case REG:
21396 asm_fprintf (stream, "[%r, %s%r]",
21397 REGNO (base), is_minus ? "-" : "",
21398 REGNO (index));
21399 break;
21401 case MULT:
21402 case ASHIFTRT:
21403 case LSHIFTRT:
21404 case ASHIFT:
21405 case ROTATERT:
21407 asm_fprintf (stream, "[%r, %s%r",
21408 REGNO (base), is_minus ? "-" : "",
21409 REGNO (XEXP (index, 0)));
21410 arm_print_operand (stream, index, 'S');
21411 fputs ("]", stream);
21412 break;
21415 default:
21416 gcc_unreachable ();
21419 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21420 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21422 extern enum machine_mode output_memory_reference_mode;
21424 gcc_assert (REG_P (XEXP (x, 0)));
21426 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21427 asm_fprintf (stream, "[%r, #%s%d]!",
21428 REGNO (XEXP (x, 0)),
21429 GET_CODE (x) == PRE_DEC ? "-" : "",
21430 GET_MODE_SIZE (output_memory_reference_mode));
21431 else
21432 asm_fprintf (stream, "[%r], #%s%d",
21433 REGNO (XEXP (x, 0)),
21434 GET_CODE (x) == POST_DEC ? "-" : "",
21435 GET_MODE_SIZE (output_memory_reference_mode));
21437 else if (GET_CODE (x) == PRE_MODIFY)
21439 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21440 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21441 asm_fprintf (stream, "#%wd]!",
21442 INTVAL (XEXP (XEXP (x, 1), 1)));
21443 else
21444 asm_fprintf (stream, "%r]!",
21445 REGNO (XEXP (XEXP (x, 1), 1)));
21447 else if (GET_CODE (x) == POST_MODIFY)
21449 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21450 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21451 asm_fprintf (stream, "#%wd",
21452 INTVAL (XEXP (XEXP (x, 1), 1)));
21453 else
21454 asm_fprintf (stream, "%r",
21455 REGNO (XEXP (XEXP (x, 1), 1)));
21457 else output_addr_const (stream, x);
21459 else
21461 if (REG_P (x))
21462 asm_fprintf (stream, "[%r]", REGNO (x));
21463 else if (GET_CODE (x) == POST_INC)
21464 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21465 else if (GET_CODE (x) == PLUS)
21467 gcc_assert (REG_P (XEXP (x, 0)));
21468 if (CONST_INT_P (XEXP (x, 1)))
21469 asm_fprintf (stream, "[%r, #%wd]",
21470 REGNO (XEXP (x, 0)),
21471 INTVAL (XEXP (x, 1)));
21472 else
21473 asm_fprintf (stream, "[%r, %r]",
21474 REGNO (XEXP (x, 0)),
21475 REGNO (XEXP (x, 1)));
21477 else
21478 output_addr_const (stream, x);
21482 /* Target hook for indicating whether a punctuation character for
21483 TARGET_PRINT_OPERAND is valid. */
21484 static bool
21485 arm_print_operand_punct_valid_p (unsigned char code)
21487 return (code == '@' || code == '|' || code == '.'
21488 || code == '(' || code == ')' || code == '#'
21489 || (TARGET_32BIT && (code == '?'))
21490 || (TARGET_THUMB2 && (code == '!'))
21491 || (TARGET_THUMB && (code == '_')));
21494 /* Target hook for assembling integer objects. The ARM version needs to
21495 handle word-sized values specially. */
21496 static bool
21497 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21499 enum machine_mode mode;
21501 if (size == UNITS_PER_WORD && aligned_p)
21503 fputs ("\t.word\t", asm_out_file);
21504 output_addr_const (asm_out_file, x);
21506 /* Mark symbols as position independent. We only do this in the
21507 .text segment, not in the .data segment. */
21508 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21509 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21511 /* See legitimize_pic_address for an explanation of the
21512 TARGET_VXWORKS_RTP check. */
21513 if (!arm_pic_data_is_text_relative
21514 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21515 fputs ("(GOT)", asm_out_file);
21516 else
21517 fputs ("(GOTOFF)", asm_out_file);
21519 fputc ('\n', asm_out_file);
21520 return true;
21523 mode = GET_MODE (x);
21525 if (arm_vector_mode_supported_p (mode))
21527 int i, units;
21529 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21531 units = CONST_VECTOR_NUNITS (x);
21532 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21534 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21535 for (i = 0; i < units; i++)
21537 rtx elt = CONST_VECTOR_ELT (x, i);
21538 assemble_integer
21539 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21541 else
21542 for (i = 0; i < units; i++)
21544 rtx elt = CONST_VECTOR_ELT (x, i);
21545 REAL_VALUE_TYPE rval;
21547 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21549 assemble_real
21550 (rval, GET_MODE_INNER (mode),
21551 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21554 return true;
21557 return default_assemble_integer (x, size, aligned_p);
21560 static void
21561 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21563 section *s;
21565 if (!TARGET_AAPCS_BASED)
21567 (is_ctor ?
21568 default_named_section_asm_out_constructor
21569 : default_named_section_asm_out_destructor) (symbol, priority);
21570 return;
21573 /* Put these in the .init_array section, using a special relocation. */
21574 if (priority != DEFAULT_INIT_PRIORITY)
21576 char buf[18];
21577 sprintf (buf, "%s.%.5u",
21578 is_ctor ? ".init_array" : ".fini_array",
21579 priority);
21580 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21582 else if (is_ctor)
21583 s = ctors_section;
21584 else
21585 s = dtors_section;
21587 switch_to_section (s);
21588 assemble_align (POINTER_SIZE);
21589 fputs ("\t.word\t", asm_out_file);
21590 output_addr_const (asm_out_file, symbol);
21591 fputs ("(target1)\n", asm_out_file);
21594 /* Add a function to the list of static constructors. */
21596 static void
21597 arm_elf_asm_constructor (rtx symbol, int priority)
21599 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21602 /* Add a function to the list of static destructors. */
21604 static void
21605 arm_elf_asm_destructor (rtx symbol, int priority)
21607 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21610 /* A finite state machine takes care of noticing whether or not instructions
21611 can be conditionally executed, and thus decrease execution time and code
21612 size by deleting branch instructions. The fsm is controlled by
21613 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21615 /* The state of the fsm controlling condition codes are:
21616 0: normal, do nothing special
21617 1: make ASM_OUTPUT_OPCODE not output this instruction
21618 2: make ASM_OUTPUT_OPCODE not output this instruction
21619 3: make instructions conditional
21620 4: make instructions conditional
21622 State transitions (state->state by whom under condition):
21623 0 -> 1 final_prescan_insn if the `target' is a label
21624 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21625 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21626 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21627 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21628 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21629 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21630 (the target insn is arm_target_insn).
21632 If the jump clobbers the conditions then we use states 2 and 4.
21634 A similar thing can be done with conditional return insns.
21636 XXX In case the `target' is an unconditional branch, this conditionalising
21637 of the instructions always reduces code size, but not always execution
21638 time. But then, I want to reduce the code size to somewhere near what
21639 /bin/cc produces. */
21641 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21642 instructions. When a COND_EXEC instruction is seen the subsequent
21643 instructions are scanned so that multiple conditional instructions can be
21644 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21645 specify the length and true/false mask for the IT block. These will be
21646 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21648 /* Returns the index of the ARM condition code string in
21649 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21650 COMPARISON should be an rtx like `(eq (...) (...))'. */
21652 enum arm_cond_code
21653 maybe_get_arm_condition_code (rtx comparison)
21655 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21656 enum arm_cond_code code;
21657 enum rtx_code comp_code = GET_CODE (comparison);
21659 if (GET_MODE_CLASS (mode) != MODE_CC)
21660 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21661 XEXP (comparison, 1));
21663 switch (mode)
21665 case CC_DNEmode: code = ARM_NE; goto dominance;
21666 case CC_DEQmode: code = ARM_EQ; goto dominance;
21667 case CC_DGEmode: code = ARM_GE; goto dominance;
21668 case CC_DGTmode: code = ARM_GT; goto dominance;
21669 case CC_DLEmode: code = ARM_LE; goto dominance;
21670 case CC_DLTmode: code = ARM_LT; goto dominance;
21671 case CC_DGEUmode: code = ARM_CS; goto dominance;
21672 case CC_DGTUmode: code = ARM_HI; goto dominance;
21673 case CC_DLEUmode: code = ARM_LS; goto dominance;
21674 case CC_DLTUmode: code = ARM_CC;
21676 dominance:
21677 if (comp_code == EQ)
21678 return ARM_INVERSE_CONDITION_CODE (code);
21679 if (comp_code == NE)
21680 return code;
21681 return ARM_NV;
21683 case CC_NOOVmode:
21684 switch (comp_code)
21686 case NE: return ARM_NE;
21687 case EQ: return ARM_EQ;
21688 case GE: return ARM_PL;
21689 case LT: return ARM_MI;
21690 default: return ARM_NV;
21693 case CC_Zmode:
21694 switch (comp_code)
21696 case NE: return ARM_NE;
21697 case EQ: return ARM_EQ;
21698 default: return ARM_NV;
21701 case CC_Nmode:
21702 switch (comp_code)
21704 case NE: return ARM_MI;
21705 case EQ: return ARM_PL;
21706 default: return ARM_NV;
21709 case CCFPEmode:
21710 case CCFPmode:
21711 /* We can handle all cases except UNEQ and LTGT. */
21712 switch (comp_code)
21714 case GE: return ARM_GE;
21715 case GT: return ARM_GT;
21716 case LE: return ARM_LS;
21717 case LT: return ARM_MI;
21718 case NE: return ARM_NE;
21719 case EQ: return ARM_EQ;
21720 case ORDERED: return ARM_VC;
21721 case UNORDERED: return ARM_VS;
21722 case UNLT: return ARM_LT;
21723 case UNLE: return ARM_LE;
21724 case UNGT: return ARM_HI;
21725 case UNGE: return ARM_PL;
21726 /* UNEQ and LTGT do not have a representation. */
21727 case UNEQ: /* Fall through. */
21728 case LTGT: /* Fall through. */
21729 default: return ARM_NV;
21732 case CC_SWPmode:
21733 switch (comp_code)
21735 case NE: return ARM_NE;
21736 case EQ: return ARM_EQ;
21737 case GE: return ARM_LE;
21738 case GT: return ARM_LT;
21739 case LE: return ARM_GE;
21740 case LT: return ARM_GT;
21741 case GEU: return ARM_LS;
21742 case GTU: return ARM_CC;
21743 case LEU: return ARM_CS;
21744 case LTU: return ARM_HI;
21745 default: return ARM_NV;
21748 case CC_Cmode:
21749 switch (comp_code)
21751 case LTU: return ARM_CS;
21752 case GEU: return ARM_CC;
21753 default: return ARM_NV;
21756 case CC_CZmode:
21757 switch (comp_code)
21759 case NE: return ARM_NE;
21760 case EQ: return ARM_EQ;
21761 case GEU: return ARM_CS;
21762 case GTU: return ARM_HI;
21763 case LEU: return ARM_LS;
21764 case LTU: return ARM_CC;
21765 default: return ARM_NV;
21768 case CC_NCVmode:
21769 switch (comp_code)
21771 case GE: return ARM_GE;
21772 case LT: return ARM_LT;
21773 case GEU: return ARM_CS;
21774 case LTU: return ARM_CC;
21775 default: return ARM_NV;
21778 case CCmode:
21779 switch (comp_code)
21781 case NE: return ARM_NE;
21782 case EQ: return ARM_EQ;
21783 case GE: return ARM_GE;
21784 case GT: return ARM_GT;
21785 case LE: return ARM_LE;
21786 case LT: return ARM_LT;
21787 case GEU: return ARM_CS;
21788 case GTU: return ARM_HI;
21789 case LEU: return ARM_LS;
21790 case LTU: return ARM_CC;
21791 default: return ARM_NV;
21794 default: gcc_unreachable ();
21798 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21799 static enum arm_cond_code
21800 get_arm_condition_code (rtx comparison)
21802 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
21803 gcc_assert (code != ARM_NV);
21804 return code;
21807 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21808 instructions. */
21809 void
21810 thumb2_final_prescan_insn (rtx insn)
21812 rtx first_insn = insn;
21813 rtx body = PATTERN (insn);
21814 rtx predicate;
21815 enum arm_cond_code code;
21816 int n;
21817 int mask;
21818 int max;
21820 /* Maximum number of conditionally executed instructions in a block
21821 is minimum of the two max values: maximum allowed in an IT block
21822 and maximum that is beneficial according to the cost model and tune. */
21823 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
21824 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
21826 /* Remove the previous insn from the count of insns to be output. */
21827 if (arm_condexec_count)
21828 arm_condexec_count--;
21830 /* Nothing to do if we are already inside a conditional block. */
21831 if (arm_condexec_count)
21832 return;
21834 if (GET_CODE (body) != COND_EXEC)
21835 return;
21837 /* Conditional jumps are implemented directly. */
21838 if (JUMP_P (insn))
21839 return;
21841 predicate = COND_EXEC_TEST (body);
21842 arm_current_cc = get_arm_condition_code (predicate);
21844 n = get_attr_ce_count (insn);
21845 arm_condexec_count = 1;
21846 arm_condexec_mask = (1 << n) - 1;
21847 arm_condexec_masklen = n;
21848 /* See if subsequent instructions can be combined into the same block. */
21849 for (;;)
21851 insn = next_nonnote_insn (insn);
21853 /* Jumping into the middle of an IT block is illegal, so a label or
21854 barrier terminates the block. */
21855 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
21856 break;
21858 body = PATTERN (insn);
21859 /* USE and CLOBBER aren't really insns, so just skip them. */
21860 if (GET_CODE (body) == USE
21861 || GET_CODE (body) == CLOBBER)
21862 continue;
21864 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21865 if (GET_CODE (body) != COND_EXEC)
21866 break;
21867 /* Maximum number of conditionally executed instructions in a block. */
21868 n = get_attr_ce_count (insn);
21869 if (arm_condexec_masklen + n > max)
21870 break;
21872 predicate = COND_EXEC_TEST (body);
21873 code = get_arm_condition_code (predicate);
21874 mask = (1 << n) - 1;
21875 if (arm_current_cc == code)
21876 arm_condexec_mask |= (mask << arm_condexec_masklen);
21877 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
21878 break;
21880 arm_condexec_count++;
21881 arm_condexec_masklen += n;
21883 /* A jump must be the last instruction in a conditional block. */
21884 if (JUMP_P (insn))
21885 break;
21887 /* Restore recog_data (getting the attributes of other insns can
21888 destroy this array, but final.c assumes that it remains intact
21889 across this call). */
21890 extract_constrain_insn_cached (first_insn);
21893 void
21894 arm_final_prescan_insn (rtx insn)
21896 /* BODY will hold the body of INSN. */
21897 rtx body = PATTERN (insn);
21899 /* This will be 1 if trying to repeat the trick, and things need to be
21900 reversed if it appears to fail. */
21901 int reverse = 0;
21903 /* If we start with a return insn, we only succeed if we find another one. */
21904 int seeking_return = 0;
21905 enum rtx_code return_code = UNKNOWN;
21907 /* START_INSN will hold the insn from where we start looking. This is the
21908 first insn after the following code_label if REVERSE is true. */
21909 rtx start_insn = insn;
21911 /* If in state 4, check if the target branch is reached, in order to
21912 change back to state 0. */
21913 if (arm_ccfsm_state == 4)
21915 if (insn == arm_target_insn)
21917 arm_target_insn = NULL;
21918 arm_ccfsm_state = 0;
21920 return;
21923 /* If in state 3, it is possible to repeat the trick, if this insn is an
21924 unconditional branch to a label, and immediately following this branch
21925 is the previous target label which is only used once, and the label this
21926 branch jumps to is not too far off. */
21927 if (arm_ccfsm_state == 3)
21929 if (simplejump_p (insn))
21931 start_insn = next_nonnote_insn (start_insn);
21932 if (BARRIER_P (start_insn))
21934 /* XXX Isn't this always a barrier? */
21935 start_insn = next_nonnote_insn (start_insn);
21937 if (LABEL_P (start_insn)
21938 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
21939 && LABEL_NUSES (start_insn) == 1)
21940 reverse = TRUE;
21941 else
21942 return;
21944 else if (ANY_RETURN_P (body))
21946 start_insn = next_nonnote_insn (start_insn);
21947 if (BARRIER_P (start_insn))
21948 start_insn = next_nonnote_insn (start_insn);
21949 if (LABEL_P (start_insn)
21950 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
21951 && LABEL_NUSES (start_insn) == 1)
21953 reverse = TRUE;
21954 seeking_return = 1;
21955 return_code = GET_CODE (body);
21957 else
21958 return;
21960 else
21961 return;
21964 gcc_assert (!arm_ccfsm_state || reverse);
21965 if (!JUMP_P (insn))
21966 return;
21968 /* This jump might be paralleled with a clobber of the condition codes
21969 the jump should always come first */
21970 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
21971 body = XVECEXP (body, 0, 0);
21973 if (reverse
21974 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
21975 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
21977 int insns_skipped;
21978 int fail = FALSE, succeed = FALSE;
21979 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
21980 int then_not_else = TRUE;
21981 rtx this_insn = start_insn, label = 0;
21983 /* Register the insn jumped to. */
21984 if (reverse)
21986 if (!seeking_return)
21987 label = XEXP (SET_SRC (body), 0);
21989 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
21990 label = XEXP (XEXP (SET_SRC (body), 1), 0);
21991 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
21993 label = XEXP (XEXP (SET_SRC (body), 2), 0);
21994 then_not_else = FALSE;
21996 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
21998 seeking_return = 1;
21999 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22001 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22003 seeking_return = 1;
22004 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22005 then_not_else = FALSE;
22007 else
22008 gcc_unreachable ();
22010 /* See how many insns this branch skips, and what kind of insns. If all
22011 insns are okay, and the label or unconditional branch to the same
22012 label is not too far away, succeed. */
22013 for (insns_skipped = 0;
22014 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22016 rtx scanbody;
22018 this_insn = next_nonnote_insn (this_insn);
22019 if (!this_insn)
22020 break;
22022 switch (GET_CODE (this_insn))
22024 case CODE_LABEL:
22025 /* Succeed if it is the target label, otherwise fail since
22026 control falls in from somewhere else. */
22027 if (this_insn == label)
22029 arm_ccfsm_state = 1;
22030 succeed = TRUE;
22032 else
22033 fail = TRUE;
22034 break;
22036 case BARRIER:
22037 /* Succeed if the following insn is the target label.
22038 Otherwise fail.
22039 If return insns are used then the last insn in a function
22040 will be a barrier. */
22041 this_insn = next_nonnote_insn (this_insn);
22042 if (this_insn && this_insn == label)
22044 arm_ccfsm_state = 1;
22045 succeed = TRUE;
22047 else
22048 fail = TRUE;
22049 break;
22051 case CALL_INSN:
22052 /* The AAPCS says that conditional calls should not be
22053 used since they make interworking inefficient (the
22054 linker can't transform BL<cond> into BLX). That's
22055 only a problem if the machine has BLX. */
22056 if (arm_arch5)
22058 fail = TRUE;
22059 break;
22062 /* Succeed if the following insn is the target label, or
22063 if the following two insns are a barrier and the
22064 target label. */
22065 this_insn = next_nonnote_insn (this_insn);
22066 if (this_insn && BARRIER_P (this_insn))
22067 this_insn = next_nonnote_insn (this_insn);
22069 if (this_insn && this_insn == label
22070 && insns_skipped < max_insns_skipped)
22072 arm_ccfsm_state = 1;
22073 succeed = TRUE;
22075 else
22076 fail = TRUE;
22077 break;
22079 case JUMP_INSN:
22080 /* If this is an unconditional branch to the same label, succeed.
22081 If it is to another label, do nothing. If it is conditional,
22082 fail. */
22083 /* XXX Probably, the tests for SET and the PC are
22084 unnecessary. */
22086 scanbody = PATTERN (this_insn);
22087 if (GET_CODE (scanbody) == SET
22088 && GET_CODE (SET_DEST (scanbody)) == PC)
22090 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22091 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22093 arm_ccfsm_state = 2;
22094 succeed = TRUE;
22096 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22097 fail = TRUE;
22099 /* Fail if a conditional return is undesirable (e.g. on a
22100 StrongARM), but still allow this if optimizing for size. */
22101 else if (GET_CODE (scanbody) == return_code
22102 && !use_return_insn (TRUE, NULL)
22103 && !optimize_size)
22104 fail = TRUE;
22105 else if (GET_CODE (scanbody) == return_code)
22107 arm_ccfsm_state = 2;
22108 succeed = TRUE;
22110 else if (GET_CODE (scanbody) == PARALLEL)
22112 switch (get_attr_conds (this_insn))
22114 case CONDS_NOCOND:
22115 break;
22116 default:
22117 fail = TRUE;
22118 break;
22121 else
22122 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22124 break;
22126 case INSN:
22127 /* Instructions using or affecting the condition codes make it
22128 fail. */
22129 scanbody = PATTERN (this_insn);
22130 if (!(GET_CODE (scanbody) == SET
22131 || GET_CODE (scanbody) == PARALLEL)
22132 || get_attr_conds (this_insn) != CONDS_NOCOND)
22133 fail = TRUE;
22134 break;
22136 default:
22137 break;
22140 if (succeed)
22142 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22143 arm_target_label = CODE_LABEL_NUMBER (label);
22144 else
22146 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22148 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22150 this_insn = next_nonnote_insn (this_insn);
22151 gcc_assert (!this_insn
22152 || (!BARRIER_P (this_insn)
22153 && !LABEL_P (this_insn)));
22155 if (!this_insn)
22157 /* Oh, dear! we ran off the end.. give up. */
22158 extract_constrain_insn_cached (insn);
22159 arm_ccfsm_state = 0;
22160 arm_target_insn = NULL;
22161 return;
22163 arm_target_insn = this_insn;
22166 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22167 what it was. */
22168 if (!reverse)
22169 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22171 if (reverse || then_not_else)
22172 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22175 /* Restore recog_data (getting the attributes of other insns can
22176 destroy this array, but final.c assumes that it remains intact
22177 across this call. */
22178 extract_constrain_insn_cached (insn);
22182 /* Output IT instructions. */
22183 void
22184 thumb2_asm_output_opcode (FILE * stream)
22186 char buff[5];
22187 int n;
22189 if (arm_condexec_mask)
22191 for (n = 0; n < arm_condexec_masklen; n++)
22192 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22193 buff[n] = 0;
22194 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22195 arm_condition_codes[arm_current_cc]);
22196 arm_condexec_mask = 0;
22200 /* Returns true if REGNO is a valid register
22201 for holding a quantity of type MODE. */
22203 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22205 if (GET_MODE_CLASS (mode) == MODE_CC)
22206 return (regno == CC_REGNUM
22207 || (TARGET_HARD_FLOAT && TARGET_VFP
22208 && regno == VFPCC_REGNUM));
22210 if (TARGET_THUMB1)
22211 /* For the Thumb we only allow values bigger than SImode in
22212 registers 0 - 6, so that there is always a second low
22213 register available to hold the upper part of the value.
22214 We probably we ought to ensure that the register is the
22215 start of an even numbered register pair. */
22216 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22218 if (TARGET_HARD_FLOAT && TARGET_VFP
22219 && IS_VFP_REGNUM (regno))
22221 if (mode == SFmode || mode == SImode)
22222 return VFP_REGNO_OK_FOR_SINGLE (regno);
22224 if (mode == DFmode)
22225 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22227 /* VFP registers can hold HFmode values, but there is no point in
22228 putting them there unless we have hardware conversion insns. */
22229 if (mode == HFmode)
22230 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22232 if (TARGET_NEON)
22233 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22234 || (VALID_NEON_QREG_MODE (mode)
22235 && NEON_REGNO_OK_FOR_QUAD (regno))
22236 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22237 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22238 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22239 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22240 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22242 return FALSE;
22245 if (TARGET_REALLY_IWMMXT)
22247 if (IS_IWMMXT_GR_REGNUM (regno))
22248 return mode == SImode;
22250 if (IS_IWMMXT_REGNUM (regno))
22251 return VALID_IWMMXT_REG_MODE (mode);
22254 /* We allow almost any value to be stored in the general registers.
22255 Restrict doubleword quantities to even register pairs so that we can
22256 use ldrd. Do not allow very large Neon structure opaque modes in
22257 general registers; they would use too many. */
22258 if (regno <= LAST_ARM_REGNUM)
22259 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22260 && ARM_NUM_REGS (mode) <= 4;
22262 if (regno == FRAME_POINTER_REGNUM
22263 || regno == ARG_POINTER_REGNUM)
22264 /* We only allow integers in the fake hard registers. */
22265 return GET_MODE_CLASS (mode) == MODE_INT;
22267 return FALSE;
22270 /* Implement MODES_TIEABLE_P. */
22272 bool
22273 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22275 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22276 return true;
22278 /* We specifically want to allow elements of "structure" modes to
22279 be tieable to the structure. This more general condition allows
22280 other rarer situations too. */
22281 if (TARGET_NEON
22282 && (VALID_NEON_DREG_MODE (mode1)
22283 || VALID_NEON_QREG_MODE (mode1)
22284 || VALID_NEON_STRUCT_MODE (mode1))
22285 && (VALID_NEON_DREG_MODE (mode2)
22286 || VALID_NEON_QREG_MODE (mode2)
22287 || VALID_NEON_STRUCT_MODE (mode2)))
22288 return true;
22290 return false;
22293 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22294 not used in arm mode. */
22296 enum reg_class
22297 arm_regno_class (int regno)
22299 if (TARGET_THUMB1)
22301 if (regno == STACK_POINTER_REGNUM)
22302 return STACK_REG;
22303 if (regno == CC_REGNUM)
22304 return CC_REG;
22305 if (regno < 8)
22306 return LO_REGS;
22307 return HI_REGS;
22310 if (TARGET_THUMB2 && regno < 8)
22311 return LO_REGS;
22313 if ( regno <= LAST_ARM_REGNUM
22314 || regno == FRAME_POINTER_REGNUM
22315 || regno == ARG_POINTER_REGNUM)
22316 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22318 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22319 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22321 if (IS_VFP_REGNUM (regno))
22323 if (regno <= D7_VFP_REGNUM)
22324 return VFP_D0_D7_REGS;
22325 else if (regno <= LAST_LO_VFP_REGNUM)
22326 return VFP_LO_REGS;
22327 else
22328 return VFP_HI_REGS;
22331 if (IS_IWMMXT_REGNUM (regno))
22332 return IWMMXT_REGS;
22334 if (IS_IWMMXT_GR_REGNUM (regno))
22335 return IWMMXT_GR_REGS;
22337 return NO_REGS;
22340 /* Handle a special case when computing the offset
22341 of an argument from the frame pointer. */
22343 arm_debugger_arg_offset (int value, rtx addr)
22345 rtx insn;
22347 /* We are only interested if dbxout_parms() failed to compute the offset. */
22348 if (value != 0)
22349 return 0;
22351 /* We can only cope with the case where the address is held in a register. */
22352 if (!REG_P (addr))
22353 return 0;
22355 /* If we are using the frame pointer to point at the argument, then
22356 an offset of 0 is correct. */
22357 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22358 return 0;
22360 /* If we are using the stack pointer to point at the
22361 argument, then an offset of 0 is correct. */
22362 /* ??? Check this is consistent with thumb2 frame layout. */
22363 if ((TARGET_THUMB || !frame_pointer_needed)
22364 && REGNO (addr) == SP_REGNUM)
22365 return 0;
22367 /* Oh dear. The argument is pointed to by a register rather
22368 than being held in a register, or being stored at a known
22369 offset from the frame pointer. Since GDB only understands
22370 those two kinds of argument we must translate the address
22371 held in the register into an offset from the frame pointer.
22372 We do this by searching through the insns for the function
22373 looking to see where this register gets its value. If the
22374 register is initialized from the frame pointer plus an offset
22375 then we are in luck and we can continue, otherwise we give up.
22377 This code is exercised by producing debugging information
22378 for a function with arguments like this:
22380 double func (double a, double b, int c, double d) {return d;}
22382 Without this code the stab for parameter 'd' will be set to
22383 an offset of 0 from the frame pointer, rather than 8. */
22385 /* The if() statement says:
22387 If the insn is a normal instruction
22388 and if the insn is setting the value in a register
22389 and if the register being set is the register holding the address of the argument
22390 and if the address is computing by an addition
22391 that involves adding to a register
22392 which is the frame pointer
22393 a constant integer
22395 then... */
22397 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22399 if ( NONJUMP_INSN_P (insn)
22400 && GET_CODE (PATTERN (insn)) == SET
22401 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22402 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22403 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22404 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22405 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22408 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22410 break;
22414 if (value == 0)
22416 debug_rtx (addr);
22417 warning (0, "unable to compute real location of stacked parameter");
22418 value = 8; /* XXX magic hack */
22421 return value;
22424 typedef enum {
22425 T_V8QI,
22426 T_V4HI,
22427 T_V4HF,
22428 T_V2SI,
22429 T_V2SF,
22430 T_DI,
22431 T_V16QI,
22432 T_V8HI,
22433 T_V4SI,
22434 T_V4SF,
22435 T_V2DI,
22436 T_TI,
22437 T_EI,
22438 T_OI,
22439 T_MAX /* Size of enum. Keep last. */
22440 } neon_builtin_type_mode;
22442 #define TYPE_MODE_BIT(X) (1 << (X))
22444 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22445 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22446 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22447 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22448 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22449 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22451 #define v8qi_UP T_V8QI
22452 #define v4hi_UP T_V4HI
22453 #define v4hf_UP T_V4HF
22454 #define v2si_UP T_V2SI
22455 #define v2sf_UP T_V2SF
22456 #define di_UP T_DI
22457 #define v16qi_UP T_V16QI
22458 #define v8hi_UP T_V8HI
22459 #define v4si_UP T_V4SI
22460 #define v4sf_UP T_V4SF
22461 #define v2di_UP T_V2DI
22462 #define ti_UP T_TI
22463 #define ei_UP T_EI
22464 #define oi_UP T_OI
22466 #define UP(X) X##_UP
22468 typedef enum {
22469 NEON_BINOP,
22470 NEON_TERNOP,
22471 NEON_UNOP,
22472 NEON_GETLANE,
22473 NEON_SETLANE,
22474 NEON_CREATE,
22475 NEON_RINT,
22476 NEON_DUP,
22477 NEON_DUPLANE,
22478 NEON_COMBINE,
22479 NEON_SPLIT,
22480 NEON_LANEMUL,
22481 NEON_LANEMULL,
22482 NEON_LANEMULH,
22483 NEON_LANEMAC,
22484 NEON_SCALARMUL,
22485 NEON_SCALARMULL,
22486 NEON_SCALARMULH,
22487 NEON_SCALARMAC,
22488 NEON_CONVERT,
22489 NEON_FLOAT_WIDEN,
22490 NEON_FLOAT_NARROW,
22491 NEON_FIXCONV,
22492 NEON_SELECT,
22493 NEON_RESULTPAIR,
22494 NEON_REINTERP,
22495 NEON_VTBL,
22496 NEON_VTBX,
22497 NEON_LOAD1,
22498 NEON_LOAD1LANE,
22499 NEON_STORE1,
22500 NEON_STORE1LANE,
22501 NEON_LOADSTRUCT,
22502 NEON_LOADSTRUCTLANE,
22503 NEON_STORESTRUCT,
22504 NEON_STORESTRUCTLANE,
22505 NEON_LOGICBINOP,
22506 NEON_SHIFTINSERT,
22507 NEON_SHIFTIMM,
22508 NEON_SHIFTACC
22509 } neon_itype;
22511 typedef struct {
22512 const char *name;
22513 const neon_itype itype;
22514 const neon_builtin_type_mode mode;
22515 const enum insn_code code;
22516 unsigned int fcode;
22517 } neon_builtin_datum;
22519 #define CF(N,X) CODE_FOR_neon_##N##X
22521 #define VAR1(T, N, A) \
22522 {#N, NEON_##T, UP (A), CF (N, A), 0}
22523 #define VAR2(T, N, A, B) \
22524 VAR1 (T, N, A), \
22525 {#N, NEON_##T, UP (B), CF (N, B), 0}
22526 #define VAR3(T, N, A, B, C) \
22527 VAR2 (T, N, A, B), \
22528 {#N, NEON_##T, UP (C), CF (N, C), 0}
22529 #define VAR4(T, N, A, B, C, D) \
22530 VAR3 (T, N, A, B, C), \
22531 {#N, NEON_##T, UP (D), CF (N, D), 0}
22532 #define VAR5(T, N, A, B, C, D, E) \
22533 VAR4 (T, N, A, B, C, D), \
22534 {#N, NEON_##T, UP (E), CF (N, E), 0}
22535 #define VAR6(T, N, A, B, C, D, E, F) \
22536 VAR5 (T, N, A, B, C, D, E), \
22537 {#N, NEON_##T, UP (F), CF (N, F), 0}
22538 #define VAR7(T, N, A, B, C, D, E, F, G) \
22539 VAR6 (T, N, A, B, C, D, E, F), \
22540 {#N, NEON_##T, UP (G), CF (N, G), 0}
22541 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22542 VAR7 (T, N, A, B, C, D, E, F, G), \
22543 {#N, NEON_##T, UP (H), CF (N, H), 0}
22544 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22545 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22546 {#N, NEON_##T, UP (I), CF (N, I), 0}
22547 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22548 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22549 {#N, NEON_##T, UP (J), CF (N, J), 0}
22551 /* The NEON builtin data can be found in arm_neon_builtins.def.
22552 The mode entries in the following table correspond to the "key" type of the
22553 instruction variant, i.e. equivalent to that which would be specified after
22554 the assembler mnemonic, which usually refers to the last vector operand.
22555 (Signed/unsigned/polynomial types are not differentiated between though, and
22556 are all mapped onto the same mode for a given element size.) The modes
22557 listed per instruction should be the same as those defined for that
22558 instruction's pattern in neon.md. */
22560 static neon_builtin_datum neon_builtin_data[] =
22562 #include "arm_neon_builtins.def"
22565 #undef CF
22566 #undef VAR1
22567 #undef VAR2
22568 #undef VAR3
22569 #undef VAR4
22570 #undef VAR5
22571 #undef VAR6
22572 #undef VAR7
22573 #undef VAR8
22574 #undef VAR9
22575 #undef VAR10
22577 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22578 #define VAR1(T, N, A) \
22579 CF (N, A)
22580 #define VAR2(T, N, A, B) \
22581 VAR1 (T, N, A), \
22582 CF (N, B)
22583 #define VAR3(T, N, A, B, C) \
22584 VAR2 (T, N, A, B), \
22585 CF (N, C)
22586 #define VAR4(T, N, A, B, C, D) \
22587 VAR3 (T, N, A, B, C), \
22588 CF (N, D)
22589 #define VAR5(T, N, A, B, C, D, E) \
22590 VAR4 (T, N, A, B, C, D), \
22591 CF (N, E)
22592 #define VAR6(T, N, A, B, C, D, E, F) \
22593 VAR5 (T, N, A, B, C, D, E), \
22594 CF (N, F)
22595 #define VAR7(T, N, A, B, C, D, E, F, G) \
22596 VAR6 (T, N, A, B, C, D, E, F), \
22597 CF (N, G)
22598 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22599 VAR7 (T, N, A, B, C, D, E, F, G), \
22600 CF (N, H)
22601 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22602 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22603 CF (N, I)
22604 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22605 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22606 CF (N, J)
22607 enum arm_builtins
22609 ARM_BUILTIN_GETWCGR0,
22610 ARM_BUILTIN_GETWCGR1,
22611 ARM_BUILTIN_GETWCGR2,
22612 ARM_BUILTIN_GETWCGR3,
22614 ARM_BUILTIN_SETWCGR0,
22615 ARM_BUILTIN_SETWCGR1,
22616 ARM_BUILTIN_SETWCGR2,
22617 ARM_BUILTIN_SETWCGR3,
22619 ARM_BUILTIN_WZERO,
22621 ARM_BUILTIN_WAVG2BR,
22622 ARM_BUILTIN_WAVG2HR,
22623 ARM_BUILTIN_WAVG2B,
22624 ARM_BUILTIN_WAVG2H,
22626 ARM_BUILTIN_WACCB,
22627 ARM_BUILTIN_WACCH,
22628 ARM_BUILTIN_WACCW,
22630 ARM_BUILTIN_WMACS,
22631 ARM_BUILTIN_WMACSZ,
22632 ARM_BUILTIN_WMACU,
22633 ARM_BUILTIN_WMACUZ,
22635 ARM_BUILTIN_WSADB,
22636 ARM_BUILTIN_WSADBZ,
22637 ARM_BUILTIN_WSADH,
22638 ARM_BUILTIN_WSADHZ,
22640 ARM_BUILTIN_WALIGNI,
22641 ARM_BUILTIN_WALIGNR0,
22642 ARM_BUILTIN_WALIGNR1,
22643 ARM_BUILTIN_WALIGNR2,
22644 ARM_BUILTIN_WALIGNR3,
22646 ARM_BUILTIN_TMIA,
22647 ARM_BUILTIN_TMIAPH,
22648 ARM_BUILTIN_TMIABB,
22649 ARM_BUILTIN_TMIABT,
22650 ARM_BUILTIN_TMIATB,
22651 ARM_BUILTIN_TMIATT,
22653 ARM_BUILTIN_TMOVMSKB,
22654 ARM_BUILTIN_TMOVMSKH,
22655 ARM_BUILTIN_TMOVMSKW,
22657 ARM_BUILTIN_TBCSTB,
22658 ARM_BUILTIN_TBCSTH,
22659 ARM_BUILTIN_TBCSTW,
22661 ARM_BUILTIN_WMADDS,
22662 ARM_BUILTIN_WMADDU,
22664 ARM_BUILTIN_WPACKHSS,
22665 ARM_BUILTIN_WPACKWSS,
22666 ARM_BUILTIN_WPACKDSS,
22667 ARM_BUILTIN_WPACKHUS,
22668 ARM_BUILTIN_WPACKWUS,
22669 ARM_BUILTIN_WPACKDUS,
22671 ARM_BUILTIN_WADDB,
22672 ARM_BUILTIN_WADDH,
22673 ARM_BUILTIN_WADDW,
22674 ARM_BUILTIN_WADDSSB,
22675 ARM_BUILTIN_WADDSSH,
22676 ARM_BUILTIN_WADDSSW,
22677 ARM_BUILTIN_WADDUSB,
22678 ARM_BUILTIN_WADDUSH,
22679 ARM_BUILTIN_WADDUSW,
22680 ARM_BUILTIN_WSUBB,
22681 ARM_BUILTIN_WSUBH,
22682 ARM_BUILTIN_WSUBW,
22683 ARM_BUILTIN_WSUBSSB,
22684 ARM_BUILTIN_WSUBSSH,
22685 ARM_BUILTIN_WSUBSSW,
22686 ARM_BUILTIN_WSUBUSB,
22687 ARM_BUILTIN_WSUBUSH,
22688 ARM_BUILTIN_WSUBUSW,
22690 ARM_BUILTIN_WAND,
22691 ARM_BUILTIN_WANDN,
22692 ARM_BUILTIN_WOR,
22693 ARM_BUILTIN_WXOR,
22695 ARM_BUILTIN_WCMPEQB,
22696 ARM_BUILTIN_WCMPEQH,
22697 ARM_BUILTIN_WCMPEQW,
22698 ARM_BUILTIN_WCMPGTUB,
22699 ARM_BUILTIN_WCMPGTUH,
22700 ARM_BUILTIN_WCMPGTUW,
22701 ARM_BUILTIN_WCMPGTSB,
22702 ARM_BUILTIN_WCMPGTSH,
22703 ARM_BUILTIN_WCMPGTSW,
22705 ARM_BUILTIN_TEXTRMSB,
22706 ARM_BUILTIN_TEXTRMSH,
22707 ARM_BUILTIN_TEXTRMSW,
22708 ARM_BUILTIN_TEXTRMUB,
22709 ARM_BUILTIN_TEXTRMUH,
22710 ARM_BUILTIN_TEXTRMUW,
22711 ARM_BUILTIN_TINSRB,
22712 ARM_BUILTIN_TINSRH,
22713 ARM_BUILTIN_TINSRW,
22715 ARM_BUILTIN_WMAXSW,
22716 ARM_BUILTIN_WMAXSH,
22717 ARM_BUILTIN_WMAXSB,
22718 ARM_BUILTIN_WMAXUW,
22719 ARM_BUILTIN_WMAXUH,
22720 ARM_BUILTIN_WMAXUB,
22721 ARM_BUILTIN_WMINSW,
22722 ARM_BUILTIN_WMINSH,
22723 ARM_BUILTIN_WMINSB,
22724 ARM_BUILTIN_WMINUW,
22725 ARM_BUILTIN_WMINUH,
22726 ARM_BUILTIN_WMINUB,
22728 ARM_BUILTIN_WMULUM,
22729 ARM_BUILTIN_WMULSM,
22730 ARM_BUILTIN_WMULUL,
22732 ARM_BUILTIN_PSADBH,
22733 ARM_BUILTIN_WSHUFH,
22735 ARM_BUILTIN_WSLLH,
22736 ARM_BUILTIN_WSLLW,
22737 ARM_BUILTIN_WSLLD,
22738 ARM_BUILTIN_WSRAH,
22739 ARM_BUILTIN_WSRAW,
22740 ARM_BUILTIN_WSRAD,
22741 ARM_BUILTIN_WSRLH,
22742 ARM_BUILTIN_WSRLW,
22743 ARM_BUILTIN_WSRLD,
22744 ARM_BUILTIN_WRORH,
22745 ARM_BUILTIN_WRORW,
22746 ARM_BUILTIN_WRORD,
22747 ARM_BUILTIN_WSLLHI,
22748 ARM_BUILTIN_WSLLWI,
22749 ARM_BUILTIN_WSLLDI,
22750 ARM_BUILTIN_WSRAHI,
22751 ARM_BUILTIN_WSRAWI,
22752 ARM_BUILTIN_WSRADI,
22753 ARM_BUILTIN_WSRLHI,
22754 ARM_BUILTIN_WSRLWI,
22755 ARM_BUILTIN_WSRLDI,
22756 ARM_BUILTIN_WRORHI,
22757 ARM_BUILTIN_WRORWI,
22758 ARM_BUILTIN_WRORDI,
22760 ARM_BUILTIN_WUNPCKIHB,
22761 ARM_BUILTIN_WUNPCKIHH,
22762 ARM_BUILTIN_WUNPCKIHW,
22763 ARM_BUILTIN_WUNPCKILB,
22764 ARM_BUILTIN_WUNPCKILH,
22765 ARM_BUILTIN_WUNPCKILW,
22767 ARM_BUILTIN_WUNPCKEHSB,
22768 ARM_BUILTIN_WUNPCKEHSH,
22769 ARM_BUILTIN_WUNPCKEHSW,
22770 ARM_BUILTIN_WUNPCKEHUB,
22771 ARM_BUILTIN_WUNPCKEHUH,
22772 ARM_BUILTIN_WUNPCKEHUW,
22773 ARM_BUILTIN_WUNPCKELSB,
22774 ARM_BUILTIN_WUNPCKELSH,
22775 ARM_BUILTIN_WUNPCKELSW,
22776 ARM_BUILTIN_WUNPCKELUB,
22777 ARM_BUILTIN_WUNPCKELUH,
22778 ARM_BUILTIN_WUNPCKELUW,
22780 ARM_BUILTIN_WABSB,
22781 ARM_BUILTIN_WABSH,
22782 ARM_BUILTIN_WABSW,
22784 ARM_BUILTIN_WADDSUBHX,
22785 ARM_BUILTIN_WSUBADDHX,
22787 ARM_BUILTIN_WABSDIFFB,
22788 ARM_BUILTIN_WABSDIFFH,
22789 ARM_BUILTIN_WABSDIFFW,
22791 ARM_BUILTIN_WADDCH,
22792 ARM_BUILTIN_WADDCW,
22794 ARM_BUILTIN_WAVG4,
22795 ARM_BUILTIN_WAVG4R,
22797 ARM_BUILTIN_WMADDSX,
22798 ARM_BUILTIN_WMADDUX,
22800 ARM_BUILTIN_WMADDSN,
22801 ARM_BUILTIN_WMADDUN,
22803 ARM_BUILTIN_WMULWSM,
22804 ARM_BUILTIN_WMULWUM,
22806 ARM_BUILTIN_WMULWSMR,
22807 ARM_BUILTIN_WMULWUMR,
22809 ARM_BUILTIN_WMULWL,
22811 ARM_BUILTIN_WMULSMR,
22812 ARM_BUILTIN_WMULUMR,
22814 ARM_BUILTIN_WQMULM,
22815 ARM_BUILTIN_WQMULMR,
22817 ARM_BUILTIN_WQMULWM,
22818 ARM_BUILTIN_WQMULWMR,
22820 ARM_BUILTIN_WADDBHUSM,
22821 ARM_BUILTIN_WADDBHUSL,
22823 ARM_BUILTIN_WQMIABB,
22824 ARM_BUILTIN_WQMIABT,
22825 ARM_BUILTIN_WQMIATB,
22826 ARM_BUILTIN_WQMIATT,
22828 ARM_BUILTIN_WQMIABBN,
22829 ARM_BUILTIN_WQMIABTN,
22830 ARM_BUILTIN_WQMIATBN,
22831 ARM_BUILTIN_WQMIATTN,
22833 ARM_BUILTIN_WMIABB,
22834 ARM_BUILTIN_WMIABT,
22835 ARM_BUILTIN_WMIATB,
22836 ARM_BUILTIN_WMIATT,
22838 ARM_BUILTIN_WMIABBN,
22839 ARM_BUILTIN_WMIABTN,
22840 ARM_BUILTIN_WMIATBN,
22841 ARM_BUILTIN_WMIATTN,
22843 ARM_BUILTIN_WMIAWBB,
22844 ARM_BUILTIN_WMIAWBT,
22845 ARM_BUILTIN_WMIAWTB,
22846 ARM_BUILTIN_WMIAWTT,
22848 ARM_BUILTIN_WMIAWBBN,
22849 ARM_BUILTIN_WMIAWBTN,
22850 ARM_BUILTIN_WMIAWTBN,
22851 ARM_BUILTIN_WMIAWTTN,
22853 ARM_BUILTIN_WMERGE,
22855 #include "arm_neon_builtins.def"
22857 ,ARM_BUILTIN_MAX
22860 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
22862 #undef CF
22863 #undef VAR1
22864 #undef VAR2
22865 #undef VAR3
22866 #undef VAR4
22867 #undef VAR5
22868 #undef VAR6
22869 #undef VAR7
22870 #undef VAR8
22871 #undef VAR9
22872 #undef VAR10
22874 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
22876 static void
22877 arm_init_neon_builtins (void)
22879 unsigned int i, fcode;
22880 tree decl;
22882 tree neon_intQI_type_node;
22883 tree neon_intHI_type_node;
22884 tree neon_floatHF_type_node;
22885 tree neon_polyQI_type_node;
22886 tree neon_polyHI_type_node;
22887 tree neon_intSI_type_node;
22888 tree neon_intDI_type_node;
22889 tree neon_float_type_node;
22891 tree intQI_pointer_node;
22892 tree intHI_pointer_node;
22893 tree intSI_pointer_node;
22894 tree intDI_pointer_node;
22895 tree float_pointer_node;
22897 tree const_intQI_node;
22898 tree const_intHI_node;
22899 tree const_intSI_node;
22900 tree const_intDI_node;
22901 tree const_float_node;
22903 tree const_intQI_pointer_node;
22904 tree const_intHI_pointer_node;
22905 tree const_intSI_pointer_node;
22906 tree const_intDI_pointer_node;
22907 tree const_float_pointer_node;
22909 tree V8QI_type_node;
22910 tree V4HI_type_node;
22911 tree V4HF_type_node;
22912 tree V2SI_type_node;
22913 tree V2SF_type_node;
22914 tree V16QI_type_node;
22915 tree V8HI_type_node;
22916 tree V4SI_type_node;
22917 tree V4SF_type_node;
22918 tree V2DI_type_node;
22920 tree intUQI_type_node;
22921 tree intUHI_type_node;
22922 tree intUSI_type_node;
22923 tree intUDI_type_node;
22925 tree intEI_type_node;
22926 tree intOI_type_node;
22927 tree intCI_type_node;
22928 tree intXI_type_node;
22930 tree V8QI_pointer_node;
22931 tree V4HI_pointer_node;
22932 tree V2SI_pointer_node;
22933 tree V2SF_pointer_node;
22934 tree V16QI_pointer_node;
22935 tree V8HI_pointer_node;
22936 tree V4SI_pointer_node;
22937 tree V4SF_pointer_node;
22938 tree V2DI_pointer_node;
22940 tree void_ftype_pv8qi_v8qi_v8qi;
22941 tree void_ftype_pv4hi_v4hi_v4hi;
22942 tree void_ftype_pv2si_v2si_v2si;
22943 tree void_ftype_pv2sf_v2sf_v2sf;
22944 tree void_ftype_pdi_di_di;
22945 tree void_ftype_pv16qi_v16qi_v16qi;
22946 tree void_ftype_pv8hi_v8hi_v8hi;
22947 tree void_ftype_pv4si_v4si_v4si;
22948 tree void_ftype_pv4sf_v4sf_v4sf;
22949 tree void_ftype_pv2di_v2di_v2di;
22951 tree reinterp_ftype_dreg[5][5];
22952 tree reinterp_ftype_qreg[5][5];
22953 tree dreg_types[5], qreg_types[5];
22955 /* Create distinguished type nodes for NEON vector element types,
22956 and pointers to values of such types, so we can detect them later. */
22957 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
22958 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
22959 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
22960 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
22961 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
22962 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
22963 neon_float_type_node = make_node (REAL_TYPE);
22964 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
22965 layout_type (neon_float_type_node);
22966 neon_floatHF_type_node = make_node (REAL_TYPE);
22967 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
22968 layout_type (neon_floatHF_type_node);
22970 /* Define typedefs which exactly correspond to the modes we are basing vector
22971 types on. If you change these names you'll need to change
22972 the table used by arm_mangle_type too. */
22973 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
22974 "__builtin_neon_qi");
22975 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
22976 "__builtin_neon_hi");
22977 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
22978 "__builtin_neon_hf");
22979 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
22980 "__builtin_neon_si");
22981 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
22982 "__builtin_neon_sf");
22983 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
22984 "__builtin_neon_di");
22985 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
22986 "__builtin_neon_poly8");
22987 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
22988 "__builtin_neon_poly16");
22990 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
22991 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
22992 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
22993 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
22994 float_pointer_node = build_pointer_type (neon_float_type_node);
22996 /* Next create constant-qualified versions of the above types. */
22997 const_intQI_node = build_qualified_type (neon_intQI_type_node,
22998 TYPE_QUAL_CONST);
22999 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23000 TYPE_QUAL_CONST);
23001 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23002 TYPE_QUAL_CONST);
23003 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23004 TYPE_QUAL_CONST);
23005 const_float_node = build_qualified_type (neon_float_type_node,
23006 TYPE_QUAL_CONST);
23008 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23009 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23010 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23011 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23012 const_float_pointer_node = build_pointer_type (const_float_node);
23014 /* Now create vector types based on our NEON element types. */
23015 /* 64-bit vectors. */
23016 V8QI_type_node =
23017 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23018 V4HI_type_node =
23019 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23020 V4HF_type_node =
23021 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23022 V2SI_type_node =
23023 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23024 V2SF_type_node =
23025 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23026 /* 128-bit vectors. */
23027 V16QI_type_node =
23028 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23029 V8HI_type_node =
23030 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23031 V4SI_type_node =
23032 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23033 V4SF_type_node =
23034 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23035 V2DI_type_node =
23036 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23038 /* Unsigned integer types for various mode sizes. */
23039 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23040 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23041 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23042 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23044 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23045 "__builtin_neon_uqi");
23046 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23047 "__builtin_neon_uhi");
23048 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23049 "__builtin_neon_usi");
23050 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23051 "__builtin_neon_udi");
23053 /* Opaque integer types for structures of vectors. */
23054 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23055 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23056 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23057 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23059 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23060 "__builtin_neon_ti");
23061 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23062 "__builtin_neon_ei");
23063 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23064 "__builtin_neon_oi");
23065 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23066 "__builtin_neon_ci");
23067 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23068 "__builtin_neon_xi");
23070 /* Pointers to vector types. */
23071 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23072 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23073 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23074 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23075 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23076 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23077 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23078 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23079 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23081 /* Operations which return results as pairs. */
23082 void_ftype_pv8qi_v8qi_v8qi =
23083 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23084 V8QI_type_node, NULL);
23085 void_ftype_pv4hi_v4hi_v4hi =
23086 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23087 V4HI_type_node, NULL);
23088 void_ftype_pv2si_v2si_v2si =
23089 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23090 V2SI_type_node, NULL);
23091 void_ftype_pv2sf_v2sf_v2sf =
23092 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23093 V2SF_type_node, NULL);
23094 void_ftype_pdi_di_di =
23095 build_function_type_list (void_type_node, intDI_pointer_node,
23096 neon_intDI_type_node, neon_intDI_type_node, NULL);
23097 void_ftype_pv16qi_v16qi_v16qi =
23098 build_function_type_list (void_type_node, V16QI_pointer_node,
23099 V16QI_type_node, V16QI_type_node, NULL);
23100 void_ftype_pv8hi_v8hi_v8hi =
23101 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23102 V8HI_type_node, NULL);
23103 void_ftype_pv4si_v4si_v4si =
23104 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23105 V4SI_type_node, NULL);
23106 void_ftype_pv4sf_v4sf_v4sf =
23107 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23108 V4SF_type_node, NULL);
23109 void_ftype_pv2di_v2di_v2di =
23110 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23111 V2DI_type_node, NULL);
23113 dreg_types[0] = V8QI_type_node;
23114 dreg_types[1] = V4HI_type_node;
23115 dreg_types[2] = V2SI_type_node;
23116 dreg_types[3] = V2SF_type_node;
23117 dreg_types[4] = neon_intDI_type_node;
23119 qreg_types[0] = V16QI_type_node;
23120 qreg_types[1] = V8HI_type_node;
23121 qreg_types[2] = V4SI_type_node;
23122 qreg_types[3] = V4SF_type_node;
23123 qreg_types[4] = V2DI_type_node;
23125 for (i = 0; i < 5; i++)
23127 int j;
23128 for (j = 0; j < 5; j++)
23130 reinterp_ftype_dreg[i][j]
23131 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23132 reinterp_ftype_qreg[i][j]
23133 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23137 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23138 i < ARRAY_SIZE (neon_builtin_data);
23139 i++, fcode++)
23141 neon_builtin_datum *d = &neon_builtin_data[i];
23143 const char* const modenames[] = {
23144 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23145 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23146 "ti", "ei", "oi"
23148 char namebuf[60];
23149 tree ftype = NULL;
23150 int is_load = 0, is_store = 0;
23152 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23154 d->fcode = fcode;
23156 switch (d->itype)
23158 case NEON_LOAD1:
23159 case NEON_LOAD1LANE:
23160 case NEON_LOADSTRUCT:
23161 case NEON_LOADSTRUCTLANE:
23162 is_load = 1;
23163 /* Fall through. */
23164 case NEON_STORE1:
23165 case NEON_STORE1LANE:
23166 case NEON_STORESTRUCT:
23167 case NEON_STORESTRUCTLANE:
23168 if (!is_load)
23169 is_store = 1;
23170 /* Fall through. */
23171 case NEON_UNOP:
23172 case NEON_RINT:
23173 case NEON_BINOP:
23174 case NEON_LOGICBINOP:
23175 case NEON_SHIFTINSERT:
23176 case NEON_TERNOP:
23177 case NEON_GETLANE:
23178 case NEON_SETLANE:
23179 case NEON_CREATE:
23180 case NEON_DUP:
23181 case NEON_DUPLANE:
23182 case NEON_SHIFTIMM:
23183 case NEON_SHIFTACC:
23184 case NEON_COMBINE:
23185 case NEON_SPLIT:
23186 case NEON_CONVERT:
23187 case NEON_FIXCONV:
23188 case NEON_LANEMUL:
23189 case NEON_LANEMULL:
23190 case NEON_LANEMULH:
23191 case NEON_LANEMAC:
23192 case NEON_SCALARMUL:
23193 case NEON_SCALARMULL:
23194 case NEON_SCALARMULH:
23195 case NEON_SCALARMAC:
23196 case NEON_SELECT:
23197 case NEON_VTBL:
23198 case NEON_VTBX:
23200 int k;
23201 tree return_type = void_type_node, args = void_list_node;
23203 /* Build a function type directly from the insn_data for
23204 this builtin. The build_function_type() function takes
23205 care of removing duplicates for us. */
23206 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23208 tree eltype;
23210 if (is_load && k == 1)
23212 /* Neon load patterns always have the memory
23213 operand in the operand 1 position. */
23214 gcc_assert (insn_data[d->code].operand[k].predicate
23215 == neon_struct_operand);
23217 switch (d->mode)
23219 case T_V8QI:
23220 case T_V16QI:
23221 eltype = const_intQI_pointer_node;
23222 break;
23224 case T_V4HI:
23225 case T_V8HI:
23226 eltype = const_intHI_pointer_node;
23227 break;
23229 case T_V2SI:
23230 case T_V4SI:
23231 eltype = const_intSI_pointer_node;
23232 break;
23234 case T_V2SF:
23235 case T_V4SF:
23236 eltype = const_float_pointer_node;
23237 break;
23239 case T_DI:
23240 case T_V2DI:
23241 eltype = const_intDI_pointer_node;
23242 break;
23244 default: gcc_unreachable ();
23247 else if (is_store && k == 0)
23249 /* Similarly, Neon store patterns use operand 0 as
23250 the memory location to store to. */
23251 gcc_assert (insn_data[d->code].operand[k].predicate
23252 == neon_struct_operand);
23254 switch (d->mode)
23256 case T_V8QI:
23257 case T_V16QI:
23258 eltype = intQI_pointer_node;
23259 break;
23261 case T_V4HI:
23262 case T_V8HI:
23263 eltype = intHI_pointer_node;
23264 break;
23266 case T_V2SI:
23267 case T_V4SI:
23268 eltype = intSI_pointer_node;
23269 break;
23271 case T_V2SF:
23272 case T_V4SF:
23273 eltype = float_pointer_node;
23274 break;
23276 case T_DI:
23277 case T_V2DI:
23278 eltype = intDI_pointer_node;
23279 break;
23281 default: gcc_unreachable ();
23284 else
23286 switch (insn_data[d->code].operand[k].mode)
23288 case VOIDmode: eltype = void_type_node; break;
23289 /* Scalars. */
23290 case QImode: eltype = neon_intQI_type_node; break;
23291 case HImode: eltype = neon_intHI_type_node; break;
23292 case SImode: eltype = neon_intSI_type_node; break;
23293 case SFmode: eltype = neon_float_type_node; break;
23294 case DImode: eltype = neon_intDI_type_node; break;
23295 case TImode: eltype = intTI_type_node; break;
23296 case EImode: eltype = intEI_type_node; break;
23297 case OImode: eltype = intOI_type_node; break;
23298 case CImode: eltype = intCI_type_node; break;
23299 case XImode: eltype = intXI_type_node; break;
23300 /* 64-bit vectors. */
23301 case V8QImode: eltype = V8QI_type_node; break;
23302 case V4HImode: eltype = V4HI_type_node; break;
23303 case V2SImode: eltype = V2SI_type_node; break;
23304 case V2SFmode: eltype = V2SF_type_node; break;
23305 /* 128-bit vectors. */
23306 case V16QImode: eltype = V16QI_type_node; break;
23307 case V8HImode: eltype = V8HI_type_node; break;
23308 case V4SImode: eltype = V4SI_type_node; break;
23309 case V4SFmode: eltype = V4SF_type_node; break;
23310 case V2DImode: eltype = V2DI_type_node; break;
23311 default: gcc_unreachable ();
23315 if (k == 0 && !is_store)
23316 return_type = eltype;
23317 else
23318 args = tree_cons (NULL_TREE, eltype, args);
23321 ftype = build_function_type (return_type, args);
23323 break;
23325 case NEON_RESULTPAIR:
23327 switch (insn_data[d->code].operand[1].mode)
23329 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23330 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23331 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23332 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23333 case DImode: ftype = void_ftype_pdi_di_di; break;
23334 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23335 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23336 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23337 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23338 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23339 default: gcc_unreachable ();
23342 break;
23344 case NEON_REINTERP:
23346 /* We iterate over 5 doubleword types, then 5 quadword
23347 types. V4HF is not a type used in reinterpret, so we translate
23348 d->mode to the correct index in reinterp_ftype_dreg. */
23349 int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
23350 switch (insn_data[d->code].operand[0].mode)
23352 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23353 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23354 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23355 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23356 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23357 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23358 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23359 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23360 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23361 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23362 default: gcc_unreachable ();
23365 break;
23366 case NEON_FLOAT_WIDEN:
23368 tree eltype = NULL_TREE;
23369 tree return_type = NULL_TREE;
23371 switch (insn_data[d->code].operand[1].mode)
23373 case V4HFmode:
23374 eltype = V4HF_type_node;
23375 return_type = V4SF_type_node;
23376 break;
23377 default: gcc_unreachable ();
23379 ftype = build_function_type_list (return_type, eltype, NULL);
23380 break;
23382 case NEON_FLOAT_NARROW:
23384 tree eltype = NULL_TREE;
23385 tree return_type = NULL_TREE;
23387 switch (insn_data[d->code].operand[1].mode)
23389 case V4SFmode:
23390 eltype = V4SF_type_node;
23391 return_type = V4HF_type_node;
23392 break;
23393 default: gcc_unreachable ();
23395 ftype = build_function_type_list (return_type, eltype, NULL);
23396 break;
23398 default:
23399 gcc_unreachable ();
23402 gcc_assert (ftype != NULL);
23404 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23406 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23407 NULL_TREE);
23408 arm_builtin_decls[fcode] = decl;
23412 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23413 do \
23415 if ((MASK) & insn_flags) \
23417 tree bdecl; \
23418 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23419 BUILT_IN_MD, NULL, NULL_TREE); \
23420 arm_builtin_decls[CODE] = bdecl; \
23423 while (0)
23425 struct builtin_description
23427 const unsigned int mask;
23428 const enum insn_code icode;
23429 const char * const name;
23430 const enum arm_builtins code;
23431 const enum rtx_code comparison;
23432 const unsigned int flag;
23435 static const struct builtin_description bdesc_2arg[] =
23437 #define IWMMXT_BUILTIN(code, string, builtin) \
23438 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23439 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23441 #define IWMMXT2_BUILTIN(code, string, builtin) \
23442 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23443 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23445 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23446 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23447 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23448 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23449 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23450 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23451 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23452 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23453 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23454 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23455 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23456 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23457 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23458 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23459 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23460 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23461 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23462 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23463 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23464 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23465 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23466 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23467 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23468 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23469 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23470 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23471 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23472 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23473 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23474 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23475 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23476 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23477 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23478 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23479 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23480 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23481 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23482 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23483 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23484 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23485 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23486 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23487 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23488 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23489 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23490 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23491 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23492 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23493 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23494 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23495 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23496 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23497 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23498 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23499 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23500 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23501 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23502 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23503 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23504 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23505 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23506 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23507 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23508 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23509 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23510 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23511 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23512 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23513 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23514 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23515 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23516 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23517 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23518 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23519 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23520 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23521 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23522 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23524 #define IWMMXT_BUILTIN2(code, builtin) \
23525 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23527 #define IWMMXT2_BUILTIN2(code, builtin) \
23528 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23530 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23531 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23532 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23533 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23534 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23535 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23536 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23537 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23538 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23539 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23542 static const struct builtin_description bdesc_1arg[] =
23544 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
23545 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
23546 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
23547 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
23548 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
23549 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
23550 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
23551 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
23552 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
23553 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
23554 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
23555 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
23556 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
23557 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
23558 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
23559 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
23560 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
23561 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
23562 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
23563 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
23564 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
23565 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
23566 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
23567 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
23570 /* Set up all the iWMMXt builtins. This is not called if
23571 TARGET_IWMMXT is zero. */
23573 static void
23574 arm_init_iwmmxt_builtins (void)
23576 const struct builtin_description * d;
23577 size_t i;
23579 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
23580 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
23581 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
23583 tree v8qi_ftype_v8qi_v8qi_int
23584 = build_function_type_list (V8QI_type_node,
23585 V8QI_type_node, V8QI_type_node,
23586 integer_type_node, NULL_TREE);
23587 tree v4hi_ftype_v4hi_int
23588 = build_function_type_list (V4HI_type_node,
23589 V4HI_type_node, integer_type_node, NULL_TREE);
23590 tree v2si_ftype_v2si_int
23591 = build_function_type_list (V2SI_type_node,
23592 V2SI_type_node, integer_type_node, NULL_TREE);
23593 tree v2si_ftype_di_di
23594 = build_function_type_list (V2SI_type_node,
23595 long_long_integer_type_node,
23596 long_long_integer_type_node,
23597 NULL_TREE);
23598 tree di_ftype_di_int
23599 = build_function_type_list (long_long_integer_type_node,
23600 long_long_integer_type_node,
23601 integer_type_node, NULL_TREE);
23602 tree di_ftype_di_int_int
23603 = build_function_type_list (long_long_integer_type_node,
23604 long_long_integer_type_node,
23605 integer_type_node,
23606 integer_type_node, NULL_TREE);
23607 tree int_ftype_v8qi
23608 = build_function_type_list (integer_type_node,
23609 V8QI_type_node, NULL_TREE);
23610 tree int_ftype_v4hi
23611 = build_function_type_list (integer_type_node,
23612 V4HI_type_node, NULL_TREE);
23613 tree int_ftype_v2si
23614 = build_function_type_list (integer_type_node,
23615 V2SI_type_node, NULL_TREE);
23616 tree int_ftype_v8qi_int
23617 = build_function_type_list (integer_type_node,
23618 V8QI_type_node, integer_type_node, NULL_TREE);
23619 tree int_ftype_v4hi_int
23620 = build_function_type_list (integer_type_node,
23621 V4HI_type_node, integer_type_node, NULL_TREE);
23622 tree int_ftype_v2si_int
23623 = build_function_type_list (integer_type_node,
23624 V2SI_type_node, integer_type_node, NULL_TREE);
23625 tree v8qi_ftype_v8qi_int_int
23626 = build_function_type_list (V8QI_type_node,
23627 V8QI_type_node, integer_type_node,
23628 integer_type_node, NULL_TREE);
23629 tree v4hi_ftype_v4hi_int_int
23630 = build_function_type_list (V4HI_type_node,
23631 V4HI_type_node, integer_type_node,
23632 integer_type_node, NULL_TREE);
23633 tree v2si_ftype_v2si_int_int
23634 = build_function_type_list (V2SI_type_node,
23635 V2SI_type_node, integer_type_node,
23636 integer_type_node, NULL_TREE);
23637 /* Miscellaneous. */
23638 tree v8qi_ftype_v4hi_v4hi
23639 = build_function_type_list (V8QI_type_node,
23640 V4HI_type_node, V4HI_type_node, NULL_TREE);
23641 tree v4hi_ftype_v2si_v2si
23642 = build_function_type_list (V4HI_type_node,
23643 V2SI_type_node, V2SI_type_node, NULL_TREE);
23644 tree v8qi_ftype_v4hi_v8qi
23645 = build_function_type_list (V8QI_type_node,
23646 V4HI_type_node, V8QI_type_node, NULL_TREE);
23647 tree v2si_ftype_v4hi_v4hi
23648 = build_function_type_list (V2SI_type_node,
23649 V4HI_type_node, V4HI_type_node, NULL_TREE);
23650 tree v2si_ftype_v8qi_v8qi
23651 = build_function_type_list (V2SI_type_node,
23652 V8QI_type_node, V8QI_type_node, NULL_TREE);
23653 tree v4hi_ftype_v4hi_di
23654 = build_function_type_list (V4HI_type_node,
23655 V4HI_type_node, long_long_integer_type_node,
23656 NULL_TREE);
23657 tree v2si_ftype_v2si_di
23658 = build_function_type_list (V2SI_type_node,
23659 V2SI_type_node, long_long_integer_type_node,
23660 NULL_TREE);
23661 tree di_ftype_void
23662 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
23663 tree int_ftype_void
23664 = build_function_type_list (integer_type_node, NULL_TREE);
23665 tree di_ftype_v8qi
23666 = build_function_type_list (long_long_integer_type_node,
23667 V8QI_type_node, NULL_TREE);
23668 tree di_ftype_v4hi
23669 = build_function_type_list (long_long_integer_type_node,
23670 V4HI_type_node, NULL_TREE);
23671 tree di_ftype_v2si
23672 = build_function_type_list (long_long_integer_type_node,
23673 V2SI_type_node, NULL_TREE);
23674 tree v2si_ftype_v4hi
23675 = build_function_type_list (V2SI_type_node,
23676 V4HI_type_node, NULL_TREE);
23677 tree v4hi_ftype_v8qi
23678 = build_function_type_list (V4HI_type_node,
23679 V8QI_type_node, NULL_TREE);
23680 tree v8qi_ftype_v8qi
23681 = build_function_type_list (V8QI_type_node,
23682 V8QI_type_node, NULL_TREE);
23683 tree v4hi_ftype_v4hi
23684 = build_function_type_list (V4HI_type_node,
23685 V4HI_type_node, NULL_TREE);
23686 tree v2si_ftype_v2si
23687 = build_function_type_list (V2SI_type_node,
23688 V2SI_type_node, NULL_TREE);
23690 tree di_ftype_di_v4hi_v4hi
23691 = build_function_type_list (long_long_unsigned_type_node,
23692 long_long_unsigned_type_node,
23693 V4HI_type_node, V4HI_type_node,
23694 NULL_TREE);
23696 tree di_ftype_v4hi_v4hi
23697 = build_function_type_list (long_long_unsigned_type_node,
23698 V4HI_type_node,V4HI_type_node,
23699 NULL_TREE);
23701 tree v2si_ftype_v2si_v4hi_v4hi
23702 = build_function_type_list (V2SI_type_node,
23703 V2SI_type_node, V4HI_type_node,
23704 V4HI_type_node, NULL_TREE);
23706 tree v2si_ftype_v2si_v8qi_v8qi
23707 = build_function_type_list (V2SI_type_node,
23708 V2SI_type_node, V8QI_type_node,
23709 V8QI_type_node, NULL_TREE);
23711 tree di_ftype_di_v2si_v2si
23712 = build_function_type_list (long_long_unsigned_type_node,
23713 long_long_unsigned_type_node,
23714 V2SI_type_node, V2SI_type_node,
23715 NULL_TREE);
23717 tree di_ftype_di_di_int
23718 = build_function_type_list (long_long_unsigned_type_node,
23719 long_long_unsigned_type_node,
23720 long_long_unsigned_type_node,
23721 integer_type_node, NULL_TREE);
23723 tree void_ftype_int
23724 = build_function_type_list (void_type_node,
23725 integer_type_node, NULL_TREE);
23727 tree v8qi_ftype_char
23728 = build_function_type_list (V8QI_type_node,
23729 signed_char_type_node, NULL_TREE);
23731 tree v4hi_ftype_short
23732 = build_function_type_list (V4HI_type_node,
23733 short_integer_type_node, NULL_TREE);
23735 tree v2si_ftype_int
23736 = build_function_type_list (V2SI_type_node,
23737 integer_type_node, NULL_TREE);
23739 /* Normal vector binops. */
23740 tree v8qi_ftype_v8qi_v8qi
23741 = build_function_type_list (V8QI_type_node,
23742 V8QI_type_node, V8QI_type_node, NULL_TREE);
23743 tree v4hi_ftype_v4hi_v4hi
23744 = build_function_type_list (V4HI_type_node,
23745 V4HI_type_node,V4HI_type_node, NULL_TREE);
23746 tree v2si_ftype_v2si_v2si
23747 = build_function_type_list (V2SI_type_node,
23748 V2SI_type_node, V2SI_type_node, NULL_TREE);
23749 tree di_ftype_di_di
23750 = build_function_type_list (long_long_unsigned_type_node,
23751 long_long_unsigned_type_node,
23752 long_long_unsigned_type_node,
23753 NULL_TREE);
23755 /* Add all builtins that are more or less simple operations on two
23756 operands. */
23757 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
23759 /* Use one of the operands; the target can have a different mode for
23760 mask-generating compares. */
23761 enum machine_mode mode;
23762 tree type;
23764 if (d->name == 0)
23765 continue;
23767 mode = insn_data[d->icode].operand[1].mode;
23769 switch (mode)
23771 case V8QImode:
23772 type = v8qi_ftype_v8qi_v8qi;
23773 break;
23774 case V4HImode:
23775 type = v4hi_ftype_v4hi_v4hi;
23776 break;
23777 case V2SImode:
23778 type = v2si_ftype_v2si_v2si;
23779 break;
23780 case DImode:
23781 type = di_ftype_di_di;
23782 break;
23784 default:
23785 gcc_unreachable ();
23788 def_mbuiltin (d->mask, d->name, type, d->code);
23791 /* Add the remaining MMX insns with somewhat more complicated types. */
23792 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
23793 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
23794 ARM_BUILTIN_ ## CODE)
23796 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
23797 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
23798 ARM_BUILTIN_ ## CODE)
23800 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
23801 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
23802 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
23803 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
23804 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
23805 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
23806 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
23807 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
23808 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
23810 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
23811 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
23812 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
23813 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
23814 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
23815 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
23817 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
23818 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
23819 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
23820 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
23821 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
23822 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
23824 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
23825 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
23826 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
23827 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
23828 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
23829 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
23831 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
23832 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
23833 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
23834 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
23835 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
23836 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
23838 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
23840 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
23841 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
23842 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
23843 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
23844 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
23845 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
23846 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
23847 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
23848 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
23849 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
23851 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
23852 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
23853 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
23854 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
23855 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
23856 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
23857 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
23858 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
23859 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
23861 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
23862 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
23863 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
23865 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
23866 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
23867 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
23869 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
23870 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
23872 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
23873 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
23874 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
23875 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
23876 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
23877 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
23879 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
23880 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
23881 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
23882 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
23883 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
23884 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
23885 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
23886 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
23887 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
23888 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
23889 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
23890 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
23892 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
23893 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
23894 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
23895 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
23897 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
23898 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
23899 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
23900 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
23901 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
23902 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
23903 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
23905 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
23906 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
23907 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
23909 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
23910 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
23911 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
23912 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
23914 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
23915 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
23916 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
23917 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
23919 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
23920 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
23921 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
23922 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
23924 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
23925 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
23926 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
23927 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
23929 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
23930 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
23931 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
23932 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
23934 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
23935 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
23936 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
23937 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
23939 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
23941 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
23942 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
23943 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
23945 #undef iwmmx_mbuiltin
23946 #undef iwmmx2_mbuiltin
23949 static void
23950 arm_init_fp16_builtins (void)
23952 tree fp16_type = make_node (REAL_TYPE);
23953 TYPE_PRECISION (fp16_type) = 16;
23954 layout_type (fp16_type);
23955 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
23958 static void
23959 arm_init_builtins (void)
23961 if (TARGET_REALLY_IWMMXT)
23962 arm_init_iwmmxt_builtins ();
23964 if (TARGET_NEON)
23965 arm_init_neon_builtins ();
23967 if (arm_fp16_format)
23968 arm_init_fp16_builtins ();
23971 /* Return the ARM builtin for CODE. */
23973 static tree
23974 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23976 if (code >= ARM_BUILTIN_MAX)
23977 return error_mark_node;
23979 return arm_builtin_decls[code];
23982 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23984 static const char *
23985 arm_invalid_parameter_type (const_tree t)
23987 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23988 return N_("function parameters cannot have __fp16 type");
23989 return NULL;
23992 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23994 static const char *
23995 arm_invalid_return_type (const_tree t)
23997 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23998 return N_("functions cannot return __fp16 type");
23999 return NULL;
24002 /* Implement TARGET_PROMOTED_TYPE. */
24004 static tree
24005 arm_promoted_type (const_tree t)
24007 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24008 return float_type_node;
24009 return NULL_TREE;
24012 /* Implement TARGET_CONVERT_TO_TYPE.
24013 Specifically, this hook implements the peculiarity of the ARM
24014 half-precision floating-point C semantics that requires conversions between
24015 __fp16 to or from double to do an intermediate conversion to float. */
24017 static tree
24018 arm_convert_to_type (tree type, tree expr)
24020 tree fromtype = TREE_TYPE (expr);
24021 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24022 return NULL_TREE;
24023 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24024 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24025 return convert (type, convert (float_type_node, expr));
24026 return NULL_TREE;
24029 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24030 This simply adds HFmode as a supported mode; even though we don't
24031 implement arithmetic on this type directly, it's supported by
24032 optabs conversions, much the way the double-word arithmetic is
24033 special-cased in the default hook. */
24035 static bool
24036 arm_scalar_mode_supported_p (enum machine_mode mode)
24038 if (mode == HFmode)
24039 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24040 else if (ALL_FIXED_POINT_MODE_P (mode))
24041 return true;
24042 else
24043 return default_scalar_mode_supported_p (mode);
24046 /* Errors in the source file can cause expand_expr to return const0_rtx
24047 where we expect a vector. To avoid crashing, use one of the vector
24048 clear instructions. */
24050 static rtx
24051 safe_vector_operand (rtx x, enum machine_mode mode)
24053 if (x != const0_rtx)
24054 return x;
24055 x = gen_reg_rtx (mode);
24057 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24058 : gen_rtx_SUBREG (DImode, x, 0)));
24059 return x;
24062 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24064 static rtx
24065 arm_expand_binop_builtin (enum insn_code icode,
24066 tree exp, rtx target)
24068 rtx pat;
24069 tree arg0 = CALL_EXPR_ARG (exp, 0);
24070 tree arg1 = CALL_EXPR_ARG (exp, 1);
24071 rtx op0 = expand_normal (arg0);
24072 rtx op1 = expand_normal (arg1);
24073 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24074 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24075 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24077 if (VECTOR_MODE_P (mode0))
24078 op0 = safe_vector_operand (op0, mode0);
24079 if (VECTOR_MODE_P (mode1))
24080 op1 = safe_vector_operand (op1, mode1);
24082 if (! target
24083 || GET_MODE (target) != tmode
24084 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24085 target = gen_reg_rtx (tmode);
24087 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24088 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24090 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24091 op0 = copy_to_mode_reg (mode0, op0);
24092 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24093 op1 = copy_to_mode_reg (mode1, op1);
24095 pat = GEN_FCN (icode) (target, op0, op1);
24096 if (! pat)
24097 return 0;
24098 emit_insn (pat);
24099 return target;
24102 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24104 static rtx
24105 arm_expand_unop_builtin (enum insn_code icode,
24106 tree exp, rtx target, int do_load)
24108 rtx pat;
24109 tree arg0 = CALL_EXPR_ARG (exp, 0);
24110 rtx op0 = expand_normal (arg0);
24111 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24112 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24114 if (! target
24115 || GET_MODE (target) != tmode
24116 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24117 target = gen_reg_rtx (tmode);
24118 if (do_load)
24119 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24120 else
24122 if (VECTOR_MODE_P (mode0))
24123 op0 = safe_vector_operand (op0, mode0);
24125 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24126 op0 = copy_to_mode_reg (mode0, op0);
24129 pat = GEN_FCN (icode) (target, op0);
24130 if (! pat)
24131 return 0;
24132 emit_insn (pat);
24133 return target;
24136 typedef enum {
24137 NEON_ARG_COPY_TO_REG,
24138 NEON_ARG_CONSTANT,
24139 NEON_ARG_MEMORY,
24140 NEON_ARG_STOP
24141 } builtin_arg;
24143 #define NEON_MAX_BUILTIN_ARGS 5
24145 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24146 and return an expression for the accessed memory.
24148 The intrinsic function operates on a block of registers that has
24149 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24150 function references the memory at EXP of type TYPE and in mode
24151 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24152 available. */
24154 static tree
24155 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24156 enum machine_mode reg_mode,
24157 neon_builtin_type_mode type_mode)
24159 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24160 tree elem_type, upper_bound, array_type;
24162 /* Work out the size of the register block in bytes. */
24163 reg_size = GET_MODE_SIZE (reg_mode);
24165 /* Work out the size of each vector in bytes. */
24166 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24167 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24169 /* Work out how many vectors there are. */
24170 gcc_assert (reg_size % vector_size == 0);
24171 nvectors = reg_size / vector_size;
24173 /* Work out the type of each element. */
24174 gcc_assert (POINTER_TYPE_P (type));
24175 elem_type = TREE_TYPE (type);
24177 /* Work out how many elements are being loaded or stored.
24178 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24179 and memory elements; anything else implies a lane load or store. */
24180 if (mem_mode == reg_mode)
24181 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24182 else
24183 nelems = nvectors;
24185 /* Create a type that describes the full access. */
24186 upper_bound = build_int_cst (size_type_node, nelems - 1);
24187 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24189 /* Dereference EXP using that type. */
24190 return fold_build2 (MEM_REF, array_type, exp,
24191 build_int_cst (build_pointer_type (array_type), 0));
24194 /* Expand a Neon builtin. */
24195 static rtx
24196 arm_expand_neon_args (rtx target, int icode, int have_retval,
24197 neon_builtin_type_mode type_mode,
24198 tree exp, int fcode, ...)
24200 va_list ap;
24201 rtx pat;
24202 tree arg[NEON_MAX_BUILTIN_ARGS];
24203 rtx op[NEON_MAX_BUILTIN_ARGS];
24204 tree arg_type;
24205 tree formals;
24206 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24207 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24208 enum machine_mode other_mode;
24209 int argc = 0;
24210 int opno;
24212 if (have_retval
24213 && (!target
24214 || GET_MODE (target) != tmode
24215 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24216 target = gen_reg_rtx (tmode);
24218 va_start (ap, fcode);
24220 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24222 for (;;)
24224 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24226 if (thisarg == NEON_ARG_STOP)
24227 break;
24228 else
24230 opno = argc + have_retval;
24231 mode[argc] = insn_data[icode].operand[opno].mode;
24232 arg[argc] = CALL_EXPR_ARG (exp, argc);
24233 arg_type = TREE_VALUE (formals);
24234 if (thisarg == NEON_ARG_MEMORY)
24236 other_mode = insn_data[icode].operand[1 - opno].mode;
24237 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24238 mode[argc], other_mode,
24239 type_mode);
24242 op[argc] = expand_normal (arg[argc]);
24244 switch (thisarg)
24246 case NEON_ARG_COPY_TO_REG:
24247 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24248 if (!(*insn_data[icode].operand[opno].predicate)
24249 (op[argc], mode[argc]))
24250 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24251 break;
24253 case NEON_ARG_CONSTANT:
24254 /* FIXME: This error message is somewhat unhelpful. */
24255 if (!(*insn_data[icode].operand[opno].predicate)
24256 (op[argc], mode[argc]))
24257 error ("argument must be a constant");
24258 break;
24260 case NEON_ARG_MEMORY:
24261 gcc_assert (MEM_P (op[argc]));
24262 PUT_MODE (op[argc], mode[argc]);
24263 /* ??? arm_neon.h uses the same built-in functions for signed
24264 and unsigned accesses, casting where necessary. This isn't
24265 alias safe. */
24266 set_mem_alias_set (op[argc], 0);
24267 if (!(*insn_data[icode].operand[opno].predicate)
24268 (op[argc], mode[argc]))
24269 op[argc] = (replace_equiv_address
24270 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24271 break;
24273 case NEON_ARG_STOP:
24274 gcc_unreachable ();
24277 argc++;
24278 formals = TREE_CHAIN (formals);
24282 va_end (ap);
24284 if (have_retval)
24285 switch (argc)
24287 case 1:
24288 pat = GEN_FCN (icode) (target, op[0]);
24289 break;
24291 case 2:
24292 pat = GEN_FCN (icode) (target, op[0], op[1]);
24293 break;
24295 case 3:
24296 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24297 break;
24299 case 4:
24300 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24301 break;
24303 case 5:
24304 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24305 break;
24307 default:
24308 gcc_unreachable ();
24310 else
24311 switch (argc)
24313 case 1:
24314 pat = GEN_FCN (icode) (op[0]);
24315 break;
24317 case 2:
24318 pat = GEN_FCN (icode) (op[0], op[1]);
24319 break;
24321 case 3:
24322 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24323 break;
24325 case 4:
24326 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24327 break;
24329 case 5:
24330 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24331 break;
24333 default:
24334 gcc_unreachable ();
24337 if (!pat)
24338 return 0;
24340 emit_insn (pat);
24342 return target;
24345 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24346 constants defined per-instruction or per instruction-variant. Instead, the
24347 required info is looked up in the table neon_builtin_data. */
24348 static rtx
24349 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24351 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24352 neon_itype itype = d->itype;
24353 enum insn_code icode = d->code;
24354 neon_builtin_type_mode type_mode = d->mode;
24356 switch (itype)
24358 case NEON_UNOP:
24359 case NEON_CONVERT:
24360 case NEON_DUPLANE:
24361 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24362 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24364 case NEON_BINOP:
24365 case NEON_SETLANE:
24366 case NEON_SCALARMUL:
24367 case NEON_SCALARMULL:
24368 case NEON_SCALARMULH:
24369 case NEON_SHIFTINSERT:
24370 case NEON_LOGICBINOP:
24371 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24372 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24373 NEON_ARG_STOP);
24375 case NEON_TERNOP:
24376 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24377 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24378 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24380 case NEON_GETLANE:
24381 case NEON_FIXCONV:
24382 case NEON_SHIFTIMM:
24383 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24384 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24385 NEON_ARG_STOP);
24387 case NEON_CREATE:
24388 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24389 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24391 case NEON_DUP:
24392 case NEON_RINT:
24393 case NEON_SPLIT:
24394 case NEON_FLOAT_WIDEN:
24395 case NEON_FLOAT_NARROW:
24396 case NEON_REINTERP:
24397 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24398 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24400 case NEON_COMBINE:
24401 case NEON_VTBL:
24402 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24403 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24405 case NEON_RESULTPAIR:
24406 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24407 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24408 NEON_ARG_STOP);
24410 case NEON_LANEMUL:
24411 case NEON_LANEMULL:
24412 case NEON_LANEMULH:
24413 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24414 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24415 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24417 case NEON_LANEMAC:
24418 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24419 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24420 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24422 case NEON_SHIFTACC:
24423 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24424 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24425 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24427 case NEON_SCALARMAC:
24428 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24429 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24430 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24432 case NEON_SELECT:
24433 case NEON_VTBX:
24434 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24435 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24436 NEON_ARG_STOP);
24438 case NEON_LOAD1:
24439 case NEON_LOADSTRUCT:
24440 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24441 NEON_ARG_MEMORY, NEON_ARG_STOP);
24443 case NEON_LOAD1LANE:
24444 case NEON_LOADSTRUCTLANE:
24445 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24446 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24447 NEON_ARG_STOP);
24449 case NEON_STORE1:
24450 case NEON_STORESTRUCT:
24451 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24452 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24454 case NEON_STORE1LANE:
24455 case NEON_STORESTRUCTLANE:
24456 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24457 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24458 NEON_ARG_STOP);
24461 gcc_unreachable ();
24464 /* Emit code to reinterpret one Neon type as another, without altering bits. */
24465 void
24466 neon_reinterpret (rtx dest, rtx src)
24468 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
24471 /* Emit code to place a Neon pair result in memory locations (with equal
24472 registers). */
24473 void
24474 neon_emit_pair_result_insn (enum machine_mode mode,
24475 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
24476 rtx op1, rtx op2)
24478 rtx mem = gen_rtx_MEM (mode, destaddr);
24479 rtx tmp1 = gen_reg_rtx (mode);
24480 rtx tmp2 = gen_reg_rtx (mode);
24482 emit_insn (intfn (tmp1, op1, op2, tmp2));
24484 emit_move_insn (mem, tmp1);
24485 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
24486 emit_move_insn (mem, tmp2);
24489 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24490 not to early-clobber SRC registers in the process.
24492 We assume that the operands described by SRC and DEST represent a
24493 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24494 number of components into which the copy has been decomposed. */
24495 void
24496 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24498 unsigned int i;
24500 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24501 || REGNO (operands[0]) < REGNO (operands[1]))
24503 for (i = 0; i < count; i++)
24505 operands[2 * i] = dest[i];
24506 operands[2 * i + 1] = src[i];
24509 else
24511 for (i = 0; i < count; i++)
24513 operands[2 * i] = dest[count - i - 1];
24514 operands[2 * i + 1] = src[count - i - 1];
24519 /* Split operands into moves from op[1] + op[2] into op[0]. */
24521 void
24522 neon_split_vcombine (rtx operands[3])
24524 unsigned int dest = REGNO (operands[0]);
24525 unsigned int src1 = REGNO (operands[1]);
24526 unsigned int src2 = REGNO (operands[2]);
24527 enum machine_mode halfmode = GET_MODE (operands[1]);
24528 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
24529 rtx destlo, desthi;
24531 if (src1 == dest && src2 == dest + halfregs)
24533 /* No-op move. Can't split to nothing; emit something. */
24534 emit_note (NOTE_INSN_DELETED);
24535 return;
24538 /* Preserve register attributes for variable tracking. */
24539 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24540 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24541 GET_MODE_SIZE (halfmode));
24543 /* Special case of reversed high/low parts. Use VSWP. */
24544 if (src2 == dest && src1 == dest + halfregs)
24546 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
24547 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
24548 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24549 return;
24552 if (!reg_overlap_mentioned_p (operands[2], destlo))
24554 /* Try to avoid unnecessary moves if part of the result
24555 is in the right place already. */
24556 if (src1 != dest)
24557 emit_move_insn (destlo, operands[1]);
24558 if (src2 != dest + halfregs)
24559 emit_move_insn (desthi, operands[2]);
24561 else
24563 if (src2 != dest + halfregs)
24564 emit_move_insn (desthi, operands[2]);
24565 if (src1 != dest)
24566 emit_move_insn (destlo, operands[1]);
24570 /* Expand an expression EXP that calls a built-in function,
24571 with result going to TARGET if that's convenient
24572 (and in mode MODE if that's convenient).
24573 SUBTARGET may be used as the target for computing one of EXP's operands.
24574 IGNORE is nonzero if the value is to be ignored. */
24576 static rtx
24577 arm_expand_builtin (tree exp,
24578 rtx target,
24579 rtx subtarget ATTRIBUTE_UNUSED,
24580 enum machine_mode mode ATTRIBUTE_UNUSED,
24581 int ignore ATTRIBUTE_UNUSED)
24583 const struct builtin_description * d;
24584 enum insn_code icode;
24585 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24586 tree arg0;
24587 tree arg1;
24588 tree arg2;
24589 rtx op0;
24590 rtx op1;
24591 rtx op2;
24592 rtx pat;
24593 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24594 size_t i;
24595 enum machine_mode tmode;
24596 enum machine_mode mode0;
24597 enum machine_mode mode1;
24598 enum machine_mode mode2;
24599 int opint;
24600 int selector;
24601 int mask;
24602 int imm;
24604 if (fcode >= ARM_BUILTIN_NEON_BASE)
24605 return arm_expand_neon_builtin (fcode, exp, target);
24607 switch (fcode)
24609 case ARM_BUILTIN_TEXTRMSB:
24610 case ARM_BUILTIN_TEXTRMUB:
24611 case ARM_BUILTIN_TEXTRMSH:
24612 case ARM_BUILTIN_TEXTRMUH:
24613 case ARM_BUILTIN_TEXTRMSW:
24614 case ARM_BUILTIN_TEXTRMUW:
24615 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
24616 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
24617 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
24618 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
24619 : CODE_FOR_iwmmxt_textrmw);
24621 arg0 = CALL_EXPR_ARG (exp, 0);
24622 arg1 = CALL_EXPR_ARG (exp, 1);
24623 op0 = expand_normal (arg0);
24624 op1 = expand_normal (arg1);
24625 tmode = insn_data[icode].operand[0].mode;
24626 mode0 = insn_data[icode].operand[1].mode;
24627 mode1 = insn_data[icode].operand[2].mode;
24629 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24630 op0 = copy_to_mode_reg (mode0, op0);
24631 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24633 /* @@@ better error message */
24634 error ("selector must be an immediate");
24635 return gen_reg_rtx (tmode);
24638 opint = INTVAL (op1);
24639 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
24641 if (opint > 7 || opint < 0)
24642 error ("the range of selector should be in 0 to 7");
24644 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
24646 if (opint > 3 || opint < 0)
24647 error ("the range of selector should be in 0 to 3");
24649 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
24651 if (opint > 1 || opint < 0)
24652 error ("the range of selector should be in 0 to 1");
24655 if (target == 0
24656 || GET_MODE (target) != tmode
24657 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24658 target = gen_reg_rtx (tmode);
24659 pat = GEN_FCN (icode) (target, op0, op1);
24660 if (! pat)
24661 return 0;
24662 emit_insn (pat);
24663 return target;
24665 case ARM_BUILTIN_WALIGNI:
24666 /* If op2 is immediate, call walighi, else call walighr. */
24667 arg0 = CALL_EXPR_ARG (exp, 0);
24668 arg1 = CALL_EXPR_ARG (exp, 1);
24669 arg2 = CALL_EXPR_ARG (exp, 2);
24670 op0 = expand_normal (arg0);
24671 op1 = expand_normal (arg1);
24672 op2 = expand_normal (arg2);
24673 if (CONST_INT_P (op2))
24675 icode = CODE_FOR_iwmmxt_waligni;
24676 tmode = insn_data[icode].operand[0].mode;
24677 mode0 = insn_data[icode].operand[1].mode;
24678 mode1 = insn_data[icode].operand[2].mode;
24679 mode2 = insn_data[icode].operand[3].mode;
24680 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24681 op0 = copy_to_mode_reg (mode0, op0);
24682 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24683 op1 = copy_to_mode_reg (mode1, op1);
24684 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
24685 selector = INTVAL (op2);
24686 if (selector > 7 || selector < 0)
24687 error ("the range of selector should be in 0 to 7");
24689 else
24691 icode = CODE_FOR_iwmmxt_walignr;
24692 tmode = insn_data[icode].operand[0].mode;
24693 mode0 = insn_data[icode].operand[1].mode;
24694 mode1 = insn_data[icode].operand[2].mode;
24695 mode2 = insn_data[icode].operand[3].mode;
24696 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24697 op0 = copy_to_mode_reg (mode0, op0);
24698 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24699 op1 = copy_to_mode_reg (mode1, op1);
24700 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
24701 op2 = copy_to_mode_reg (mode2, op2);
24703 if (target == 0
24704 || GET_MODE (target) != tmode
24705 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
24706 target = gen_reg_rtx (tmode);
24707 pat = GEN_FCN (icode) (target, op0, op1, op2);
24708 if (!pat)
24709 return 0;
24710 emit_insn (pat);
24711 return target;
24713 case ARM_BUILTIN_TINSRB:
24714 case ARM_BUILTIN_TINSRH:
24715 case ARM_BUILTIN_TINSRW:
24716 case ARM_BUILTIN_WMERGE:
24717 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
24718 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
24719 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
24720 : CODE_FOR_iwmmxt_tinsrw);
24721 arg0 = CALL_EXPR_ARG (exp, 0);
24722 arg1 = CALL_EXPR_ARG (exp, 1);
24723 arg2 = CALL_EXPR_ARG (exp, 2);
24724 op0 = expand_normal (arg0);
24725 op1 = expand_normal (arg1);
24726 op2 = expand_normal (arg2);
24727 tmode = insn_data[icode].operand[0].mode;
24728 mode0 = insn_data[icode].operand[1].mode;
24729 mode1 = insn_data[icode].operand[2].mode;
24730 mode2 = insn_data[icode].operand[3].mode;
24732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24733 op0 = copy_to_mode_reg (mode0, op0);
24734 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24735 op1 = copy_to_mode_reg (mode1, op1);
24736 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24738 error ("selector must be an immediate");
24739 return const0_rtx;
24741 if (icode == CODE_FOR_iwmmxt_wmerge)
24743 selector = INTVAL (op2);
24744 if (selector > 7 || selector < 0)
24745 error ("the range of selector should be in 0 to 7");
24747 if ((icode == CODE_FOR_iwmmxt_tinsrb)
24748 || (icode == CODE_FOR_iwmmxt_tinsrh)
24749 || (icode == CODE_FOR_iwmmxt_tinsrw))
24751 mask = 0x01;
24752 selector= INTVAL (op2);
24753 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
24754 error ("the range of selector should be in 0 to 7");
24755 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
24756 error ("the range of selector should be in 0 to 3");
24757 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
24758 error ("the range of selector should be in 0 to 1");
24759 mask <<= selector;
24760 op2 = GEN_INT (mask);
24762 if (target == 0
24763 || GET_MODE (target) != tmode
24764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24765 target = gen_reg_rtx (tmode);
24766 pat = GEN_FCN (icode) (target, op0, op1, op2);
24767 if (! pat)
24768 return 0;
24769 emit_insn (pat);
24770 return target;
24772 case ARM_BUILTIN_SETWCGR0:
24773 case ARM_BUILTIN_SETWCGR1:
24774 case ARM_BUILTIN_SETWCGR2:
24775 case ARM_BUILTIN_SETWCGR3:
24776 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
24777 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
24778 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
24779 : CODE_FOR_iwmmxt_setwcgr3);
24780 arg0 = CALL_EXPR_ARG (exp, 0);
24781 op0 = expand_normal (arg0);
24782 mode0 = insn_data[icode].operand[0].mode;
24783 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
24784 op0 = copy_to_mode_reg (mode0, op0);
24785 pat = GEN_FCN (icode) (op0);
24786 if (!pat)
24787 return 0;
24788 emit_insn (pat);
24789 return 0;
24791 case ARM_BUILTIN_GETWCGR0:
24792 case ARM_BUILTIN_GETWCGR1:
24793 case ARM_BUILTIN_GETWCGR2:
24794 case ARM_BUILTIN_GETWCGR3:
24795 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
24796 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
24797 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
24798 : CODE_FOR_iwmmxt_getwcgr3);
24799 tmode = insn_data[icode].operand[0].mode;
24800 if (target == 0
24801 || GET_MODE (target) != tmode
24802 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
24803 target = gen_reg_rtx (tmode);
24804 pat = GEN_FCN (icode) (target);
24805 if (!pat)
24806 return 0;
24807 emit_insn (pat);
24808 return target;
24810 case ARM_BUILTIN_WSHUFH:
24811 icode = CODE_FOR_iwmmxt_wshufh;
24812 arg0 = CALL_EXPR_ARG (exp, 0);
24813 arg1 = CALL_EXPR_ARG (exp, 1);
24814 op0 = expand_normal (arg0);
24815 op1 = expand_normal (arg1);
24816 tmode = insn_data[icode].operand[0].mode;
24817 mode1 = insn_data[icode].operand[1].mode;
24818 mode2 = insn_data[icode].operand[2].mode;
24820 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
24821 op0 = copy_to_mode_reg (mode1, op0);
24822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
24824 error ("mask must be an immediate");
24825 return const0_rtx;
24827 selector = INTVAL (op1);
24828 if (selector < 0 || selector > 255)
24829 error ("the range of mask should be in 0 to 255");
24830 if (target == 0
24831 || GET_MODE (target) != tmode
24832 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24833 target = gen_reg_rtx (tmode);
24834 pat = GEN_FCN (icode) (target, op0, op1);
24835 if (! pat)
24836 return 0;
24837 emit_insn (pat);
24838 return target;
24840 case ARM_BUILTIN_WMADDS:
24841 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
24842 case ARM_BUILTIN_WMADDSX:
24843 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
24844 case ARM_BUILTIN_WMADDSN:
24845 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
24846 case ARM_BUILTIN_WMADDU:
24847 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
24848 case ARM_BUILTIN_WMADDUX:
24849 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
24850 case ARM_BUILTIN_WMADDUN:
24851 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
24852 case ARM_BUILTIN_WSADBZ:
24853 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
24854 case ARM_BUILTIN_WSADHZ:
24855 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
24857 /* Several three-argument builtins. */
24858 case ARM_BUILTIN_WMACS:
24859 case ARM_BUILTIN_WMACU:
24860 case ARM_BUILTIN_TMIA:
24861 case ARM_BUILTIN_TMIAPH:
24862 case ARM_BUILTIN_TMIATT:
24863 case ARM_BUILTIN_TMIATB:
24864 case ARM_BUILTIN_TMIABT:
24865 case ARM_BUILTIN_TMIABB:
24866 case ARM_BUILTIN_WQMIABB:
24867 case ARM_BUILTIN_WQMIABT:
24868 case ARM_BUILTIN_WQMIATB:
24869 case ARM_BUILTIN_WQMIATT:
24870 case ARM_BUILTIN_WQMIABBN:
24871 case ARM_BUILTIN_WQMIABTN:
24872 case ARM_BUILTIN_WQMIATBN:
24873 case ARM_BUILTIN_WQMIATTN:
24874 case ARM_BUILTIN_WMIABB:
24875 case ARM_BUILTIN_WMIABT:
24876 case ARM_BUILTIN_WMIATB:
24877 case ARM_BUILTIN_WMIATT:
24878 case ARM_BUILTIN_WMIABBN:
24879 case ARM_BUILTIN_WMIABTN:
24880 case ARM_BUILTIN_WMIATBN:
24881 case ARM_BUILTIN_WMIATTN:
24882 case ARM_BUILTIN_WMIAWBB:
24883 case ARM_BUILTIN_WMIAWBT:
24884 case ARM_BUILTIN_WMIAWTB:
24885 case ARM_BUILTIN_WMIAWTT:
24886 case ARM_BUILTIN_WMIAWBBN:
24887 case ARM_BUILTIN_WMIAWBTN:
24888 case ARM_BUILTIN_WMIAWTBN:
24889 case ARM_BUILTIN_WMIAWTTN:
24890 case ARM_BUILTIN_WSADB:
24891 case ARM_BUILTIN_WSADH:
24892 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
24893 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
24894 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
24895 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
24896 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
24897 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
24898 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
24899 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
24900 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
24901 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
24902 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
24903 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
24904 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
24905 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
24906 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
24907 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
24908 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
24909 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
24910 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
24911 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
24912 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
24913 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
24914 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
24915 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
24916 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
24917 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
24918 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
24919 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
24920 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
24921 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
24922 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
24923 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
24924 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
24925 : CODE_FOR_iwmmxt_wsadh);
24926 arg0 = CALL_EXPR_ARG (exp, 0);
24927 arg1 = CALL_EXPR_ARG (exp, 1);
24928 arg2 = CALL_EXPR_ARG (exp, 2);
24929 op0 = expand_normal (arg0);
24930 op1 = expand_normal (arg1);
24931 op2 = expand_normal (arg2);
24932 tmode = insn_data[icode].operand[0].mode;
24933 mode0 = insn_data[icode].operand[1].mode;
24934 mode1 = insn_data[icode].operand[2].mode;
24935 mode2 = insn_data[icode].operand[3].mode;
24937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24938 op0 = copy_to_mode_reg (mode0, op0);
24939 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24940 op1 = copy_to_mode_reg (mode1, op1);
24941 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24942 op2 = copy_to_mode_reg (mode2, op2);
24943 if (target == 0
24944 || GET_MODE (target) != tmode
24945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24946 target = gen_reg_rtx (tmode);
24947 pat = GEN_FCN (icode) (target, op0, op1, op2);
24948 if (! pat)
24949 return 0;
24950 emit_insn (pat);
24951 return target;
24953 case ARM_BUILTIN_WZERO:
24954 target = gen_reg_rtx (DImode);
24955 emit_insn (gen_iwmmxt_clrdi (target));
24956 return target;
24958 case ARM_BUILTIN_WSRLHI:
24959 case ARM_BUILTIN_WSRLWI:
24960 case ARM_BUILTIN_WSRLDI:
24961 case ARM_BUILTIN_WSLLHI:
24962 case ARM_BUILTIN_WSLLWI:
24963 case ARM_BUILTIN_WSLLDI:
24964 case ARM_BUILTIN_WSRAHI:
24965 case ARM_BUILTIN_WSRAWI:
24966 case ARM_BUILTIN_WSRADI:
24967 case ARM_BUILTIN_WRORHI:
24968 case ARM_BUILTIN_WRORWI:
24969 case ARM_BUILTIN_WRORDI:
24970 case ARM_BUILTIN_WSRLH:
24971 case ARM_BUILTIN_WSRLW:
24972 case ARM_BUILTIN_WSRLD:
24973 case ARM_BUILTIN_WSLLH:
24974 case ARM_BUILTIN_WSLLW:
24975 case ARM_BUILTIN_WSLLD:
24976 case ARM_BUILTIN_WSRAH:
24977 case ARM_BUILTIN_WSRAW:
24978 case ARM_BUILTIN_WSRAD:
24979 case ARM_BUILTIN_WRORH:
24980 case ARM_BUILTIN_WRORW:
24981 case ARM_BUILTIN_WRORD:
24982 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
24983 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
24984 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
24985 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
24986 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
24987 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
24988 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
24989 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
24990 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
24991 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
24992 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
24993 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
24994 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
24995 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
24996 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
24997 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
24998 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
24999 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25000 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25001 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25002 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25003 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25004 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25005 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25006 : CODE_FOR_nothing);
25007 arg1 = CALL_EXPR_ARG (exp, 1);
25008 op1 = expand_normal (arg1);
25009 if (GET_MODE (op1) == VOIDmode)
25011 imm = INTVAL (op1);
25012 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25013 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25014 && (imm < 0 || imm > 32))
25016 if (fcode == ARM_BUILTIN_WRORHI)
25017 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25018 else if (fcode == ARM_BUILTIN_WRORWI)
25019 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25020 else if (fcode == ARM_BUILTIN_WRORH)
25021 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25022 else
25023 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25025 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25026 && (imm < 0 || imm > 64))
25028 if (fcode == ARM_BUILTIN_WRORDI)
25029 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25030 else
25031 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25033 else if (imm < 0)
25035 if (fcode == ARM_BUILTIN_WSRLHI)
25036 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25037 else if (fcode == ARM_BUILTIN_WSRLWI)
25038 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25039 else if (fcode == ARM_BUILTIN_WSRLDI)
25040 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25041 else if (fcode == ARM_BUILTIN_WSLLHI)
25042 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25043 else if (fcode == ARM_BUILTIN_WSLLWI)
25044 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25045 else if (fcode == ARM_BUILTIN_WSLLDI)
25046 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25047 else if (fcode == ARM_BUILTIN_WSRAHI)
25048 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25049 else if (fcode == ARM_BUILTIN_WSRAWI)
25050 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25051 else if (fcode == ARM_BUILTIN_WSRADI)
25052 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25053 else if (fcode == ARM_BUILTIN_WSRLH)
25054 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25055 else if (fcode == ARM_BUILTIN_WSRLW)
25056 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25057 else if (fcode == ARM_BUILTIN_WSRLD)
25058 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25059 else if (fcode == ARM_BUILTIN_WSLLH)
25060 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25061 else if (fcode == ARM_BUILTIN_WSLLW)
25062 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25063 else if (fcode == ARM_BUILTIN_WSLLD)
25064 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25065 else if (fcode == ARM_BUILTIN_WSRAH)
25066 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25067 else if (fcode == ARM_BUILTIN_WSRAW)
25068 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25069 else
25070 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25073 return arm_expand_binop_builtin (icode, exp, target);
25075 default:
25076 break;
25079 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25080 if (d->code == (const enum arm_builtins) fcode)
25081 return arm_expand_binop_builtin (d->icode, exp, target);
25083 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25084 if (d->code == (const enum arm_builtins) fcode)
25085 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25087 /* @@@ Should really do something sensible here. */
25088 return NULL_RTX;
25091 /* Return the number (counting from 0) of
25092 the least significant set bit in MASK. */
25094 inline static int
25095 number_of_first_bit_set (unsigned mask)
25097 return ctz_hwi (mask);
25100 /* Like emit_multi_reg_push, but allowing for a different set of
25101 registers to be described as saved. MASK is the set of registers
25102 to be saved; REAL_REGS is the set of registers to be described as
25103 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25105 static rtx
25106 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25108 unsigned long regno;
25109 rtx par[10], tmp, reg, insn;
25110 int i, j;
25112 /* Build the parallel of the registers actually being stored. */
25113 for (i = 0; mask; ++i, mask &= mask - 1)
25115 regno = ctz_hwi (mask);
25116 reg = gen_rtx_REG (SImode, regno);
25118 if (i == 0)
25119 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25120 else
25121 tmp = gen_rtx_USE (VOIDmode, reg);
25123 par[i] = tmp;
25126 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25127 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25128 tmp = gen_frame_mem (BLKmode, tmp);
25129 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25130 par[0] = tmp;
25132 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25133 insn = emit_insn (tmp);
25135 /* Always build the stack adjustment note for unwind info. */
25136 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25137 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25138 par[0] = tmp;
25140 /* Build the parallel of the registers recorded as saved for unwind. */
25141 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25143 regno = ctz_hwi (real_regs);
25144 reg = gen_rtx_REG (SImode, regno);
25146 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25147 tmp = gen_frame_mem (SImode, tmp);
25148 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25149 RTX_FRAME_RELATED_P (tmp) = 1;
25150 par[j + 1] = tmp;
25153 if (j == 0)
25154 tmp = par[0];
25155 else
25157 RTX_FRAME_RELATED_P (par[0]) = 1;
25158 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25161 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25163 return insn;
25166 /* Emit code to push or pop registers to or from the stack. F is the
25167 assembly file. MASK is the registers to pop. */
25168 static void
25169 thumb_pop (FILE *f, unsigned long mask)
25171 int regno;
25172 int lo_mask = mask & 0xFF;
25173 int pushed_words = 0;
25175 gcc_assert (mask);
25177 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25179 /* Special case. Do not generate a POP PC statement here, do it in
25180 thumb_exit() */
25181 thumb_exit (f, -1);
25182 return;
25185 fprintf (f, "\tpop\t{");
25187 /* Look at the low registers first. */
25188 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25190 if (lo_mask & 1)
25192 asm_fprintf (f, "%r", regno);
25194 if ((lo_mask & ~1) != 0)
25195 fprintf (f, ", ");
25197 pushed_words++;
25201 if (mask & (1 << PC_REGNUM))
25203 /* Catch popping the PC. */
25204 if (TARGET_INTERWORK || TARGET_BACKTRACE
25205 || crtl->calls_eh_return)
25207 /* The PC is never poped directly, instead
25208 it is popped into r3 and then BX is used. */
25209 fprintf (f, "}\n");
25211 thumb_exit (f, -1);
25213 return;
25215 else
25217 if (mask & 0xFF)
25218 fprintf (f, ", ");
25220 asm_fprintf (f, "%r", PC_REGNUM);
25224 fprintf (f, "}\n");
25227 /* Generate code to return from a thumb function.
25228 If 'reg_containing_return_addr' is -1, then the return address is
25229 actually on the stack, at the stack pointer. */
25230 static void
25231 thumb_exit (FILE *f, int reg_containing_return_addr)
25233 unsigned regs_available_for_popping;
25234 unsigned regs_to_pop;
25235 int pops_needed;
25236 unsigned available;
25237 unsigned required;
25238 int mode;
25239 int size;
25240 int restore_a4 = FALSE;
25242 /* Compute the registers we need to pop. */
25243 regs_to_pop = 0;
25244 pops_needed = 0;
25246 if (reg_containing_return_addr == -1)
25248 regs_to_pop |= 1 << LR_REGNUM;
25249 ++pops_needed;
25252 if (TARGET_BACKTRACE)
25254 /* Restore the (ARM) frame pointer and stack pointer. */
25255 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25256 pops_needed += 2;
25259 /* If there is nothing to pop then just emit the BX instruction and
25260 return. */
25261 if (pops_needed == 0)
25263 if (crtl->calls_eh_return)
25264 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25266 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25267 return;
25269 /* Otherwise if we are not supporting interworking and we have not created
25270 a backtrace structure and the function was not entered in ARM mode then
25271 just pop the return address straight into the PC. */
25272 else if (!TARGET_INTERWORK
25273 && !TARGET_BACKTRACE
25274 && !is_called_in_ARM_mode (current_function_decl)
25275 && !crtl->calls_eh_return)
25277 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25278 return;
25281 /* Find out how many of the (return) argument registers we can corrupt. */
25282 regs_available_for_popping = 0;
25284 /* If returning via __builtin_eh_return, the bottom three registers
25285 all contain information needed for the return. */
25286 if (crtl->calls_eh_return)
25287 size = 12;
25288 else
25290 /* If we can deduce the registers used from the function's
25291 return value. This is more reliable that examining
25292 df_regs_ever_live_p () because that will be set if the register is
25293 ever used in the function, not just if the register is used
25294 to hold a return value. */
25296 if (crtl->return_rtx != 0)
25297 mode = GET_MODE (crtl->return_rtx);
25298 else
25299 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25301 size = GET_MODE_SIZE (mode);
25303 if (size == 0)
25305 /* In a void function we can use any argument register.
25306 In a function that returns a structure on the stack
25307 we can use the second and third argument registers. */
25308 if (mode == VOIDmode)
25309 regs_available_for_popping =
25310 (1 << ARG_REGISTER (1))
25311 | (1 << ARG_REGISTER (2))
25312 | (1 << ARG_REGISTER (3));
25313 else
25314 regs_available_for_popping =
25315 (1 << ARG_REGISTER (2))
25316 | (1 << ARG_REGISTER (3));
25318 else if (size <= 4)
25319 regs_available_for_popping =
25320 (1 << ARG_REGISTER (2))
25321 | (1 << ARG_REGISTER (3));
25322 else if (size <= 8)
25323 regs_available_for_popping =
25324 (1 << ARG_REGISTER (3));
25327 /* Match registers to be popped with registers into which we pop them. */
25328 for (available = regs_available_for_popping,
25329 required = regs_to_pop;
25330 required != 0 && available != 0;
25331 available &= ~(available & - available),
25332 required &= ~(required & - required))
25333 -- pops_needed;
25335 /* If we have any popping registers left over, remove them. */
25336 if (available > 0)
25337 regs_available_for_popping &= ~available;
25339 /* Otherwise if we need another popping register we can use
25340 the fourth argument register. */
25341 else if (pops_needed)
25343 /* If we have not found any free argument registers and
25344 reg a4 contains the return address, we must move it. */
25345 if (regs_available_for_popping == 0
25346 && reg_containing_return_addr == LAST_ARG_REGNUM)
25348 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25349 reg_containing_return_addr = LR_REGNUM;
25351 else if (size > 12)
25353 /* Register a4 is being used to hold part of the return value,
25354 but we have dire need of a free, low register. */
25355 restore_a4 = TRUE;
25357 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25360 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25362 /* The fourth argument register is available. */
25363 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25365 --pops_needed;
25369 /* Pop as many registers as we can. */
25370 thumb_pop (f, regs_available_for_popping);
25372 /* Process the registers we popped. */
25373 if (reg_containing_return_addr == -1)
25375 /* The return address was popped into the lowest numbered register. */
25376 regs_to_pop &= ~(1 << LR_REGNUM);
25378 reg_containing_return_addr =
25379 number_of_first_bit_set (regs_available_for_popping);
25381 /* Remove this register for the mask of available registers, so that
25382 the return address will not be corrupted by further pops. */
25383 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25386 /* If we popped other registers then handle them here. */
25387 if (regs_available_for_popping)
25389 int frame_pointer;
25391 /* Work out which register currently contains the frame pointer. */
25392 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25394 /* Move it into the correct place. */
25395 asm_fprintf (f, "\tmov\t%r, %r\n",
25396 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25398 /* (Temporarily) remove it from the mask of popped registers. */
25399 regs_available_for_popping &= ~(1 << frame_pointer);
25400 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25402 if (regs_available_for_popping)
25404 int stack_pointer;
25406 /* We popped the stack pointer as well,
25407 find the register that contains it. */
25408 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25410 /* Move it into the stack register. */
25411 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25413 /* At this point we have popped all necessary registers, so
25414 do not worry about restoring regs_available_for_popping
25415 to its correct value:
25417 assert (pops_needed == 0)
25418 assert (regs_available_for_popping == (1 << frame_pointer))
25419 assert (regs_to_pop == (1 << STACK_POINTER)) */
25421 else
25423 /* Since we have just move the popped value into the frame
25424 pointer, the popping register is available for reuse, and
25425 we know that we still have the stack pointer left to pop. */
25426 regs_available_for_popping |= (1 << frame_pointer);
25430 /* If we still have registers left on the stack, but we no longer have
25431 any registers into which we can pop them, then we must move the return
25432 address into the link register and make available the register that
25433 contained it. */
25434 if (regs_available_for_popping == 0 && pops_needed > 0)
25436 regs_available_for_popping |= 1 << reg_containing_return_addr;
25438 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
25439 reg_containing_return_addr);
25441 reg_containing_return_addr = LR_REGNUM;
25444 /* If we have registers left on the stack then pop some more.
25445 We know that at most we will want to pop FP and SP. */
25446 if (pops_needed > 0)
25448 int popped_into;
25449 int move_to;
25451 thumb_pop (f, regs_available_for_popping);
25453 /* We have popped either FP or SP.
25454 Move whichever one it is into the correct register. */
25455 popped_into = number_of_first_bit_set (regs_available_for_popping);
25456 move_to = number_of_first_bit_set (regs_to_pop);
25458 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
25460 regs_to_pop &= ~(1 << move_to);
25462 --pops_needed;
25465 /* If we still have not popped everything then we must have only
25466 had one register available to us and we are now popping the SP. */
25467 if (pops_needed > 0)
25469 int popped_into;
25471 thumb_pop (f, regs_available_for_popping);
25473 popped_into = number_of_first_bit_set (regs_available_for_popping);
25475 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
25477 assert (regs_to_pop == (1 << STACK_POINTER))
25478 assert (pops_needed == 1)
25482 /* If necessary restore the a4 register. */
25483 if (restore_a4)
25485 if (reg_containing_return_addr != LR_REGNUM)
25487 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25488 reg_containing_return_addr = LR_REGNUM;
25491 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
25494 if (crtl->calls_eh_return)
25495 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25497 /* Return to caller. */
25498 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25501 /* Scan INSN just before assembler is output for it.
25502 For Thumb-1, we track the status of the condition codes; this
25503 information is used in the cbranchsi4_insn pattern. */
25504 void
25505 thumb1_final_prescan_insn (rtx insn)
25507 if (flag_print_asm_name)
25508 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25509 INSN_ADDRESSES (INSN_UID (insn)));
25510 /* Don't overwrite the previous setter when we get to a cbranch. */
25511 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25513 enum attr_conds conds;
25515 if (cfun->machine->thumb1_cc_insn)
25517 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25518 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25519 CC_STATUS_INIT;
25521 conds = get_attr_conds (insn);
25522 if (conds == CONDS_SET)
25524 rtx set = single_set (insn);
25525 cfun->machine->thumb1_cc_insn = insn;
25526 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25527 cfun->machine->thumb1_cc_op1 = const0_rtx;
25528 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
25529 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25531 rtx src1 = XEXP (SET_SRC (set), 1);
25532 if (src1 == const0_rtx)
25533 cfun->machine->thumb1_cc_mode = CCmode;
25535 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25537 /* Record the src register operand instead of dest because
25538 cprop_hardreg pass propagates src. */
25539 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25542 else if (conds != CONDS_NOCOND)
25543 cfun->machine->thumb1_cc_insn = NULL_RTX;
25546 /* Check if unexpected far jump is used. */
25547 if (cfun->machine->lr_save_eliminated
25548 && get_attr_far_jump (insn) == FAR_JUMP_YES)
25549 internal_error("Unexpected thumb1 far jump");
25553 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25555 unsigned HOST_WIDE_INT mask = 0xff;
25556 int i;
25558 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25559 if (val == 0) /* XXX */
25560 return 0;
25562 for (i = 0; i < 25; i++)
25563 if ((val & (mask << i)) == val)
25564 return 1;
25566 return 0;
25569 /* Returns nonzero if the current function contains,
25570 or might contain a far jump. */
25571 static int
25572 thumb_far_jump_used_p (void)
25574 rtx insn;
25575 bool far_jump = false;
25576 unsigned int func_size = 0;
25578 /* This test is only important for leaf functions. */
25579 /* assert (!leaf_function_p ()); */
25581 /* If we have already decided that far jumps may be used,
25582 do not bother checking again, and always return true even if
25583 it turns out that they are not being used. Once we have made
25584 the decision that far jumps are present (and that hence the link
25585 register will be pushed onto the stack) we cannot go back on it. */
25586 if (cfun->machine->far_jump_used)
25587 return 1;
25589 /* If this function is not being called from the prologue/epilogue
25590 generation code then it must be being called from the
25591 INITIAL_ELIMINATION_OFFSET macro. */
25592 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25594 /* In this case we know that we are being asked about the elimination
25595 of the arg pointer register. If that register is not being used,
25596 then there are no arguments on the stack, and we do not have to
25597 worry that a far jump might force the prologue to push the link
25598 register, changing the stack offsets. In this case we can just
25599 return false, since the presence of far jumps in the function will
25600 not affect stack offsets.
25602 If the arg pointer is live (or if it was live, but has now been
25603 eliminated and so set to dead) then we do have to test to see if
25604 the function might contain a far jump. This test can lead to some
25605 false negatives, since before reload is completed, then length of
25606 branch instructions is not known, so gcc defaults to returning their
25607 longest length, which in turn sets the far jump attribute to true.
25609 A false negative will not result in bad code being generated, but it
25610 will result in a needless push and pop of the link register. We
25611 hope that this does not occur too often.
25613 If we need doubleword stack alignment this could affect the other
25614 elimination offsets so we can't risk getting it wrong. */
25615 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25616 cfun->machine->arg_pointer_live = 1;
25617 else if (!cfun->machine->arg_pointer_live)
25618 return 0;
25621 /* Check to see if the function contains a branch
25622 insn with the far jump attribute set. */
25623 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25625 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25627 far_jump = true;
25629 func_size += get_attr_length (insn);
25632 /* Attribute far_jump will always be true for thumb1 before
25633 shorten_branch pass. So checking far_jump attribute before
25634 shorten_branch isn't much useful.
25636 Following heuristic tries to estimate more accurately if a far jump
25637 may finally be used. The heuristic is very conservative as there is
25638 no chance to roll-back the decision of not to use far jump.
25640 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25641 2-byte insn is associated with a 4 byte constant pool. Using
25642 function size 2048/3 as the threshold is conservative enough. */
25643 if (far_jump)
25645 if ((func_size * 3) >= 2048)
25647 /* Record the fact that we have decided that
25648 the function does use far jumps. */
25649 cfun->machine->far_jump_used = 1;
25650 return 1;
25654 return 0;
25657 /* Return nonzero if FUNC must be entered in ARM mode. */
25659 is_called_in_ARM_mode (tree func)
25661 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25663 /* Ignore the problem about functions whose address is taken. */
25664 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25665 return TRUE;
25667 #ifdef ARM_PE
25668 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25669 #else
25670 return FALSE;
25671 #endif
25674 /* Given the stack offsets and register mask in OFFSETS, decide how
25675 many additional registers to push instead of subtracting a constant
25676 from SP. For epilogues the principle is the same except we use pop.
25677 FOR_PROLOGUE indicates which we're generating. */
25678 static int
25679 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25681 HOST_WIDE_INT amount;
25682 unsigned long live_regs_mask = offsets->saved_regs_mask;
25683 /* Extract a mask of the ones we can give to the Thumb's push/pop
25684 instruction. */
25685 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25686 /* Then count how many other high registers will need to be pushed. */
25687 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25688 int n_free, reg_base, size;
25690 if (!for_prologue && frame_pointer_needed)
25691 amount = offsets->locals_base - offsets->saved_regs;
25692 else
25693 amount = offsets->outgoing_args - offsets->saved_regs;
25695 /* If the stack frame size is 512 exactly, we can save one load
25696 instruction, which should make this a win even when optimizing
25697 for speed. */
25698 if (!optimize_size && amount != 512)
25699 return 0;
25701 /* Can't do this if there are high registers to push. */
25702 if (high_regs_pushed != 0)
25703 return 0;
25705 /* Shouldn't do it in the prologue if no registers would normally
25706 be pushed at all. In the epilogue, also allow it if we'll have
25707 a pop insn for the PC. */
25708 if (l_mask == 0
25709 && (for_prologue
25710 || TARGET_BACKTRACE
25711 || (live_regs_mask & 1 << LR_REGNUM) == 0
25712 || TARGET_INTERWORK
25713 || crtl->args.pretend_args_size != 0))
25714 return 0;
25716 /* Don't do this if thumb_expand_prologue wants to emit instructions
25717 between the push and the stack frame allocation. */
25718 if (for_prologue
25719 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25720 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25721 return 0;
25723 reg_base = 0;
25724 n_free = 0;
25725 if (!for_prologue)
25727 size = arm_size_return_regs ();
25728 reg_base = ARM_NUM_INTS (size);
25729 live_regs_mask >>= reg_base;
25732 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25733 && (for_prologue || call_used_regs[reg_base + n_free]))
25735 live_regs_mask >>= 1;
25736 n_free++;
25739 if (n_free == 0)
25740 return 0;
25741 gcc_assert (amount / 4 * 4 == amount);
25743 if (amount >= 512 && (amount - n_free * 4) < 512)
25744 return (amount - 508) / 4;
25745 if (amount <= n_free * 4)
25746 return amount / 4;
25747 return 0;
25750 /* The bits which aren't usefully expanded as rtl. */
25751 const char *
25752 thumb1_unexpanded_epilogue (void)
25754 arm_stack_offsets *offsets;
25755 int regno;
25756 unsigned long live_regs_mask = 0;
25757 int high_regs_pushed = 0;
25758 int extra_pop;
25759 int had_to_push_lr;
25760 int size;
25762 if (cfun->machine->return_used_this_function != 0)
25763 return "";
25765 if (IS_NAKED (arm_current_func_type ()))
25766 return "";
25768 offsets = arm_get_frame_offsets ();
25769 live_regs_mask = offsets->saved_regs_mask;
25770 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25772 /* If we can deduce the registers used from the function's return value.
25773 This is more reliable that examining df_regs_ever_live_p () because that
25774 will be set if the register is ever used in the function, not just if
25775 the register is used to hold a return value. */
25776 size = arm_size_return_regs ();
25778 extra_pop = thumb1_extra_regs_pushed (offsets, false);
25779 if (extra_pop > 0)
25781 unsigned long extra_mask = (1 << extra_pop) - 1;
25782 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25785 /* The prolog may have pushed some high registers to use as
25786 work registers. e.g. the testsuite file:
25787 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25788 compiles to produce:
25789 push {r4, r5, r6, r7, lr}
25790 mov r7, r9
25791 mov r6, r8
25792 push {r6, r7}
25793 as part of the prolog. We have to undo that pushing here. */
25795 if (high_regs_pushed)
25797 unsigned long mask = live_regs_mask & 0xff;
25798 int next_hi_reg;
25800 /* The available low registers depend on the size of the value we are
25801 returning. */
25802 if (size <= 12)
25803 mask |= 1 << 3;
25804 if (size <= 8)
25805 mask |= 1 << 2;
25807 if (mask == 0)
25808 /* Oh dear! We have no low registers into which we can pop
25809 high registers! */
25810 internal_error
25811 ("no low registers available for popping high registers");
25813 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
25814 if (live_regs_mask & (1 << next_hi_reg))
25815 break;
25817 while (high_regs_pushed)
25819 /* Find lo register(s) into which the high register(s) can
25820 be popped. */
25821 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
25823 if (mask & (1 << regno))
25824 high_regs_pushed--;
25825 if (high_regs_pushed == 0)
25826 break;
25829 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
25831 /* Pop the values into the low register(s). */
25832 thumb_pop (asm_out_file, mask);
25834 /* Move the value(s) into the high registers. */
25835 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
25837 if (mask & (1 << regno))
25839 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25840 regno);
25842 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
25843 if (live_regs_mask & (1 << next_hi_reg))
25844 break;
25848 live_regs_mask &= ~0x0f00;
25851 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25852 live_regs_mask &= 0xff;
25854 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25856 /* Pop the return address into the PC. */
25857 if (had_to_push_lr)
25858 live_regs_mask |= 1 << PC_REGNUM;
25860 /* Either no argument registers were pushed or a backtrace
25861 structure was created which includes an adjusted stack
25862 pointer, so just pop everything. */
25863 if (live_regs_mask)
25864 thumb_pop (asm_out_file, live_regs_mask);
25866 /* We have either just popped the return address into the
25867 PC or it is was kept in LR for the entire function.
25868 Note that thumb_pop has already called thumb_exit if the
25869 PC was in the list. */
25870 if (!had_to_push_lr)
25871 thumb_exit (asm_out_file, LR_REGNUM);
25873 else
25875 /* Pop everything but the return address. */
25876 if (live_regs_mask)
25877 thumb_pop (asm_out_file, live_regs_mask);
25879 if (had_to_push_lr)
25881 if (size > 12)
25883 /* We have no free low regs, so save one. */
25884 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25885 LAST_ARG_REGNUM);
25888 /* Get the return address into a temporary register. */
25889 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25891 if (size > 12)
25893 /* Move the return address to lr. */
25894 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25895 LAST_ARG_REGNUM);
25896 /* Restore the low register. */
25897 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25898 IP_REGNUM);
25899 regno = LR_REGNUM;
25901 else
25902 regno = LAST_ARG_REGNUM;
25904 else
25905 regno = LR_REGNUM;
25907 /* Remove the argument registers that were pushed onto the stack. */
25908 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25909 SP_REGNUM, SP_REGNUM,
25910 crtl->args.pretend_args_size);
25912 thumb_exit (asm_out_file, regno);
25915 return "";
25918 /* Functions to save and restore machine-specific function data. */
25919 static struct machine_function *
25920 arm_init_machine_status (void)
25922 struct machine_function *machine;
25923 machine = ggc_alloc_cleared_machine_function ();
25925 #if ARM_FT_UNKNOWN != 0
25926 machine->func_type = ARM_FT_UNKNOWN;
25927 #endif
25928 return machine;
25931 /* Return an RTX indicating where the return address to the
25932 calling function can be found. */
25934 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25936 if (count != 0)
25937 return NULL_RTX;
25939 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25942 /* Do anything needed before RTL is emitted for each function. */
25943 void
25944 arm_init_expanders (void)
25946 /* Arrange to initialize and mark the machine per-function status. */
25947 init_machine_status = arm_init_machine_status;
25949 /* This is to stop the combine pass optimizing away the alignment
25950 adjustment of va_arg. */
25951 /* ??? It is claimed that this should not be necessary. */
25952 if (cfun)
25953 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25957 /* Like arm_compute_initial_elimination offset. Simpler because there
25958 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25959 to point at the base of the local variables after static stack
25960 space for a function has been allocated. */
25962 HOST_WIDE_INT
25963 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25965 arm_stack_offsets *offsets;
25967 offsets = arm_get_frame_offsets ();
25969 switch (from)
25971 case ARG_POINTER_REGNUM:
25972 switch (to)
25974 case STACK_POINTER_REGNUM:
25975 return offsets->outgoing_args - offsets->saved_args;
25977 case FRAME_POINTER_REGNUM:
25978 return offsets->soft_frame - offsets->saved_args;
25980 case ARM_HARD_FRAME_POINTER_REGNUM:
25981 return offsets->saved_regs - offsets->saved_args;
25983 case THUMB_HARD_FRAME_POINTER_REGNUM:
25984 return offsets->locals_base - offsets->saved_args;
25986 default:
25987 gcc_unreachable ();
25989 break;
25991 case FRAME_POINTER_REGNUM:
25992 switch (to)
25994 case STACK_POINTER_REGNUM:
25995 return offsets->outgoing_args - offsets->soft_frame;
25997 case ARM_HARD_FRAME_POINTER_REGNUM:
25998 return offsets->saved_regs - offsets->soft_frame;
26000 case THUMB_HARD_FRAME_POINTER_REGNUM:
26001 return offsets->locals_base - offsets->soft_frame;
26003 default:
26004 gcc_unreachable ();
26006 break;
26008 default:
26009 gcc_unreachable ();
26013 /* Generate the function's prologue. */
26015 void
26016 thumb1_expand_prologue (void)
26018 rtx insn;
26020 HOST_WIDE_INT amount;
26021 arm_stack_offsets *offsets;
26022 unsigned long func_type;
26023 int regno;
26024 unsigned long live_regs_mask;
26025 unsigned long l_mask;
26026 unsigned high_regs_pushed = 0;
26028 func_type = arm_current_func_type ();
26030 /* Naked functions don't have prologues. */
26031 if (IS_NAKED (func_type))
26032 return;
26034 if (IS_INTERRUPT (func_type))
26036 error ("interrupt Service Routines cannot be coded in Thumb mode");
26037 return;
26040 if (is_called_in_ARM_mode (current_function_decl))
26041 emit_insn (gen_prologue_thumb1_interwork ());
26043 offsets = arm_get_frame_offsets ();
26044 live_regs_mask = offsets->saved_regs_mask;
26046 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26047 l_mask = live_regs_mask & 0x40ff;
26048 /* Then count how many other high registers will need to be pushed. */
26049 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26051 if (crtl->args.pretend_args_size)
26053 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26055 if (cfun->machine->uses_anonymous_args)
26057 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26058 unsigned long mask;
26060 mask = 1ul << (LAST_ARG_REGNUM + 1);
26061 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26063 insn = thumb1_emit_multi_reg_push (mask, 0);
26065 else
26067 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26068 stack_pointer_rtx, x));
26070 RTX_FRAME_RELATED_P (insn) = 1;
26073 if (TARGET_BACKTRACE)
26075 HOST_WIDE_INT offset = 0;
26076 unsigned work_register;
26077 rtx work_reg, x, arm_hfp_rtx;
26079 /* We have been asked to create a stack backtrace structure.
26080 The code looks like this:
26082 0 .align 2
26083 0 func:
26084 0 sub SP, #16 Reserve space for 4 registers.
26085 2 push {R7} Push low registers.
26086 4 add R7, SP, #20 Get the stack pointer before the push.
26087 6 str R7, [SP, #8] Store the stack pointer
26088 (before reserving the space).
26089 8 mov R7, PC Get hold of the start of this code + 12.
26090 10 str R7, [SP, #16] Store it.
26091 12 mov R7, FP Get hold of the current frame pointer.
26092 14 str R7, [SP, #4] Store it.
26093 16 mov R7, LR Get hold of the current return address.
26094 18 str R7, [SP, #12] Store it.
26095 20 add R7, SP, #16 Point at the start of the
26096 backtrace structure.
26097 22 mov FP, R7 Put this value into the frame pointer. */
26099 work_register = thumb_find_work_register (live_regs_mask);
26100 work_reg = gen_rtx_REG (SImode, work_register);
26101 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26103 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26104 stack_pointer_rtx, GEN_INT (-16)));
26105 RTX_FRAME_RELATED_P (insn) = 1;
26107 if (l_mask)
26109 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26110 RTX_FRAME_RELATED_P (insn) = 1;
26112 offset = bit_count (l_mask) * UNITS_PER_WORD;
26115 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26116 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26118 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26119 x = gen_frame_mem (SImode, x);
26120 emit_move_insn (x, work_reg);
26122 /* Make sure that the instruction fetching the PC is in the right place
26123 to calculate "start of backtrace creation code + 12". */
26124 /* ??? The stores using the common WORK_REG ought to be enough to
26125 prevent the scheduler from doing anything weird. Failing that
26126 we could always move all of the following into an UNSPEC_VOLATILE. */
26127 if (l_mask)
26129 x = gen_rtx_REG (SImode, PC_REGNUM);
26130 emit_move_insn (work_reg, x);
26132 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26133 x = gen_frame_mem (SImode, x);
26134 emit_move_insn (x, work_reg);
26136 emit_move_insn (work_reg, arm_hfp_rtx);
26138 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26139 x = gen_frame_mem (SImode, x);
26140 emit_move_insn (x, work_reg);
26142 else
26144 emit_move_insn (work_reg, arm_hfp_rtx);
26146 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26147 x = gen_frame_mem (SImode, x);
26148 emit_move_insn (x, work_reg);
26150 x = gen_rtx_REG (SImode, PC_REGNUM);
26151 emit_move_insn (work_reg, x);
26153 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26154 x = gen_frame_mem (SImode, x);
26155 emit_move_insn (x, work_reg);
26158 x = gen_rtx_REG (SImode, LR_REGNUM);
26159 emit_move_insn (work_reg, x);
26161 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26162 x = gen_frame_mem (SImode, x);
26163 emit_move_insn (x, work_reg);
26165 x = GEN_INT (offset + 12);
26166 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26168 emit_move_insn (arm_hfp_rtx, work_reg);
26170 /* Optimization: If we are not pushing any low registers but we are going
26171 to push some high registers then delay our first push. This will just
26172 be a push of LR and we can combine it with the push of the first high
26173 register. */
26174 else if ((l_mask & 0xff) != 0
26175 || (high_regs_pushed == 0 && l_mask))
26177 unsigned long mask = l_mask;
26178 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26179 insn = thumb1_emit_multi_reg_push (mask, mask);
26180 RTX_FRAME_RELATED_P (insn) = 1;
26183 if (high_regs_pushed)
26185 unsigned pushable_regs;
26186 unsigned next_hi_reg;
26187 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26188 : crtl->args.info.nregs;
26189 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26191 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26192 if (live_regs_mask & (1 << next_hi_reg))
26193 break;
26195 /* Here we need to mask out registers used for passing arguments
26196 even if they can be pushed. This is to avoid using them to stash the high
26197 registers. Such kind of stash may clobber the use of arguments. */
26198 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26200 if (pushable_regs == 0)
26201 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26203 while (high_regs_pushed > 0)
26205 unsigned long real_regs_mask = 0;
26207 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26209 if (pushable_regs & (1 << regno))
26211 emit_move_insn (gen_rtx_REG (SImode, regno),
26212 gen_rtx_REG (SImode, next_hi_reg));
26214 high_regs_pushed --;
26215 real_regs_mask |= (1 << next_hi_reg);
26217 if (high_regs_pushed)
26219 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26220 next_hi_reg --)
26221 if (live_regs_mask & (1 << next_hi_reg))
26222 break;
26224 else
26226 pushable_regs &= ~((1 << regno) - 1);
26227 break;
26232 /* If we had to find a work register and we have not yet
26233 saved the LR then add it to the list of regs to push. */
26234 if (l_mask == (1 << LR_REGNUM))
26236 pushable_regs |= l_mask;
26237 real_regs_mask |= l_mask;
26238 l_mask = 0;
26241 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26242 RTX_FRAME_RELATED_P (insn) = 1;
26246 /* Load the pic register before setting the frame pointer,
26247 so we can use r7 as a temporary work register. */
26248 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26249 arm_load_pic_register (live_regs_mask);
26251 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26252 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26253 stack_pointer_rtx);
26255 if (flag_stack_usage_info)
26256 current_function_static_stack_size
26257 = offsets->outgoing_args - offsets->saved_args;
26259 amount = offsets->outgoing_args - offsets->saved_regs;
26260 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26261 if (amount)
26263 if (amount < 512)
26265 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26266 GEN_INT (- amount)));
26267 RTX_FRAME_RELATED_P (insn) = 1;
26269 else
26271 rtx reg, dwarf;
26273 /* The stack decrement is too big for an immediate value in a single
26274 insn. In theory we could issue multiple subtracts, but after
26275 three of them it becomes more space efficient to place the full
26276 value in the constant pool and load into a register. (Also the
26277 ARM debugger really likes to see only one stack decrement per
26278 function). So instead we look for a scratch register into which
26279 we can load the decrement, and then we subtract this from the
26280 stack pointer. Unfortunately on the thumb the only available
26281 scratch registers are the argument registers, and we cannot use
26282 these as they may hold arguments to the function. Instead we
26283 attempt to locate a call preserved register which is used by this
26284 function. If we can find one, then we know that it will have
26285 been pushed at the start of the prologue and so we can corrupt
26286 it now. */
26287 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26288 if (live_regs_mask & (1 << regno))
26289 break;
26291 gcc_assert(regno <= LAST_LO_REGNUM);
26293 reg = gen_rtx_REG (SImode, regno);
26295 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26297 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26298 stack_pointer_rtx, reg));
26300 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26301 plus_constant (Pmode, stack_pointer_rtx,
26302 -amount));
26303 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26304 RTX_FRAME_RELATED_P (insn) = 1;
26308 if (frame_pointer_needed)
26309 thumb_set_frame_pointer (offsets);
26311 /* If we are profiling, make sure no instructions are scheduled before
26312 the call to mcount. Similarly if the user has requested no
26313 scheduling in the prolog. Similarly if we want non-call exceptions
26314 using the EABI unwinder, to prevent faulting instructions from being
26315 swapped with a stack adjustment. */
26316 if (crtl->profile || !TARGET_SCHED_PROLOG
26317 || (arm_except_unwind_info (&global_options) == UI_TARGET
26318 && cfun->can_throw_non_call_exceptions))
26319 emit_insn (gen_blockage ());
26321 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26322 if (live_regs_mask & 0xff)
26323 cfun->machine->lr_save_eliminated = 0;
26326 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26327 POP instruction can be generated. LR should be replaced by PC. All
26328 the checks required are already done by USE_RETURN_INSN (). Hence,
26329 all we really need to check here is if single register is to be
26330 returned, or multiple register return. */
26331 void
26332 thumb2_expand_return (bool simple_return)
26334 int i, num_regs;
26335 unsigned long saved_regs_mask;
26336 arm_stack_offsets *offsets;
26338 offsets = arm_get_frame_offsets ();
26339 saved_regs_mask = offsets->saved_regs_mask;
26341 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26342 if (saved_regs_mask & (1 << i))
26343 num_regs++;
26345 if (!simple_return && saved_regs_mask)
26347 if (num_regs == 1)
26349 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26350 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26351 rtx addr = gen_rtx_MEM (SImode,
26352 gen_rtx_POST_INC (SImode,
26353 stack_pointer_rtx));
26354 set_mem_alias_set (addr, get_frame_alias_set ());
26355 XVECEXP (par, 0, 0) = ret_rtx;
26356 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26357 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26358 emit_jump_insn (par);
26360 else
26362 saved_regs_mask &= ~ (1 << LR_REGNUM);
26363 saved_regs_mask |= (1 << PC_REGNUM);
26364 arm_emit_multi_reg_pop (saved_regs_mask);
26367 else
26369 emit_jump_insn (simple_return_rtx);
26373 void
26374 thumb1_expand_epilogue (void)
26376 HOST_WIDE_INT amount;
26377 arm_stack_offsets *offsets;
26378 int regno;
26380 /* Naked functions don't have prologues. */
26381 if (IS_NAKED (arm_current_func_type ()))
26382 return;
26384 offsets = arm_get_frame_offsets ();
26385 amount = offsets->outgoing_args - offsets->saved_regs;
26387 if (frame_pointer_needed)
26389 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26390 amount = offsets->locals_base - offsets->saved_regs;
26392 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26394 gcc_assert (amount >= 0);
26395 if (amount)
26397 emit_insn (gen_blockage ());
26399 if (amount < 512)
26400 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26401 GEN_INT (amount)));
26402 else
26404 /* r3 is always free in the epilogue. */
26405 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26407 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26408 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26412 /* Emit a USE (stack_pointer_rtx), so that
26413 the stack adjustment will not be deleted. */
26414 emit_insn (gen_force_register_use (stack_pointer_rtx));
26416 if (crtl->profile || !TARGET_SCHED_PROLOG)
26417 emit_insn (gen_blockage ());
26419 /* Emit a clobber for each insn that will be restored in the epilogue,
26420 so that flow2 will get register lifetimes correct. */
26421 for (regno = 0; regno < 13; regno++)
26422 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
26423 emit_clobber (gen_rtx_REG (SImode, regno));
26425 if (! df_regs_ever_live_p (LR_REGNUM))
26426 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26429 /* Epilogue code for APCS frame. */
26430 static void
26431 arm_expand_epilogue_apcs_frame (bool really_return)
26433 unsigned long func_type;
26434 unsigned long saved_regs_mask;
26435 int num_regs = 0;
26436 int i;
26437 int floats_from_frame = 0;
26438 arm_stack_offsets *offsets;
26440 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26441 func_type = arm_current_func_type ();
26443 /* Get frame offsets for ARM. */
26444 offsets = arm_get_frame_offsets ();
26445 saved_regs_mask = offsets->saved_regs_mask;
26447 /* Find the offset of the floating-point save area in the frame. */
26448 floats_from_frame = offsets->saved_args - offsets->frame;
26450 /* Compute how many core registers saved and how far away the floats are. */
26451 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26452 if (saved_regs_mask & (1 << i))
26454 num_regs++;
26455 floats_from_frame += 4;
26458 if (TARGET_HARD_FLOAT && TARGET_VFP)
26460 int start_reg;
26462 /* The offset is from IP_REGNUM. */
26463 int saved_size = arm_get_vfp_saved_size ();
26464 if (saved_size > 0)
26466 floats_from_frame += saved_size;
26467 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
26468 hard_frame_pointer_rtx,
26469 GEN_INT (-floats_from_frame)));
26472 /* Generate VFP register multi-pop. */
26473 start_reg = FIRST_VFP_REGNUM;
26475 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26476 /* Look for a case where a reg does not need restoring. */
26477 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26478 && (!df_regs_ever_live_p (i + 1)
26479 || call_used_regs[i + 1]))
26481 if (start_reg != i)
26482 arm_emit_vfp_multi_reg_pop (start_reg,
26483 (i - start_reg) / 2,
26484 gen_rtx_REG (SImode,
26485 IP_REGNUM));
26486 start_reg = i + 2;
26489 /* Restore the remaining regs that we have discovered (or possibly
26490 even all of them, if the conditional in the for loop never
26491 fired). */
26492 if (start_reg != i)
26493 arm_emit_vfp_multi_reg_pop (start_reg,
26494 (i - start_reg) / 2,
26495 gen_rtx_REG (SImode, IP_REGNUM));
26498 if (TARGET_IWMMXT)
26500 /* The frame pointer is guaranteed to be non-double-word aligned, as
26501 it is set to double-word-aligned old_stack_pointer - 4. */
26502 rtx insn;
26503 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26505 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26506 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26508 rtx addr = gen_frame_mem (V2SImode,
26509 plus_constant (Pmode, hard_frame_pointer_rtx,
26510 - lrm_count * 4));
26511 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26512 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26513 gen_rtx_REG (V2SImode, i),
26514 NULL_RTX);
26515 lrm_count += 2;
26519 /* saved_regs_mask should contain IP which contains old stack pointer
26520 at the time of activation creation. Since SP and IP are adjacent registers,
26521 we can restore the value directly into SP. */
26522 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26523 saved_regs_mask &= ~(1 << IP_REGNUM);
26524 saved_regs_mask |= (1 << SP_REGNUM);
26526 /* There are two registers left in saved_regs_mask - LR and PC. We
26527 only need to restore LR (the return address), but to
26528 save time we can load it directly into PC, unless we need a
26529 special function exit sequence, or we are not really returning. */
26530 if (really_return
26531 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26532 && !crtl->calls_eh_return)
26533 /* Delete LR from the register mask, so that LR on
26534 the stack is loaded into the PC in the register mask. */
26535 saved_regs_mask &= ~(1 << LR_REGNUM);
26536 else
26537 saved_regs_mask &= ~(1 << PC_REGNUM);
26539 num_regs = bit_count (saved_regs_mask);
26540 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26542 emit_insn (gen_blockage ());
26543 /* Unwind the stack to just below the saved registers. */
26544 emit_insn (gen_addsi3 (stack_pointer_rtx,
26545 hard_frame_pointer_rtx,
26546 GEN_INT (- 4 * num_regs)));
26549 arm_emit_multi_reg_pop (saved_regs_mask);
26551 if (IS_INTERRUPT (func_type))
26553 /* Interrupt handlers will have pushed the
26554 IP onto the stack, so restore it now. */
26555 rtx insn;
26556 rtx addr = gen_rtx_MEM (SImode,
26557 gen_rtx_POST_INC (SImode,
26558 stack_pointer_rtx));
26559 set_mem_alias_set (addr, get_frame_alias_set ());
26560 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26561 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26562 gen_rtx_REG (SImode, IP_REGNUM),
26563 NULL_RTX);
26566 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26567 return;
26569 if (crtl->calls_eh_return)
26570 emit_insn (gen_addsi3 (stack_pointer_rtx,
26571 stack_pointer_rtx,
26572 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
26574 if (IS_STACKALIGN (func_type))
26575 /* Restore the original stack pointer. Before prologue, the stack was
26576 realigned and the original stack pointer saved in r0. For details,
26577 see comment in arm_expand_prologue. */
26578 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
26580 emit_jump_insn (simple_return_rtx);
26583 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26584 function is not a sibcall. */
26585 void
26586 arm_expand_epilogue (bool really_return)
26588 unsigned long func_type;
26589 unsigned long saved_regs_mask;
26590 int num_regs = 0;
26591 int i;
26592 int amount;
26593 arm_stack_offsets *offsets;
26595 func_type = arm_current_func_type ();
26597 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26598 let output_return_instruction take care of instruction emission if any. */
26599 if (IS_NAKED (func_type)
26600 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26602 if (really_return)
26603 emit_jump_insn (simple_return_rtx);
26604 return;
26607 /* If we are throwing an exception, then we really must be doing a
26608 return, so we can't tail-call. */
26609 gcc_assert (!crtl->calls_eh_return || really_return);
26611 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26613 arm_expand_epilogue_apcs_frame (really_return);
26614 return;
26617 /* Get frame offsets for ARM. */
26618 offsets = arm_get_frame_offsets ();
26619 saved_regs_mask = offsets->saved_regs_mask;
26620 num_regs = bit_count (saved_regs_mask);
26622 if (frame_pointer_needed)
26624 rtx insn;
26625 /* Restore stack pointer if necessary. */
26626 if (TARGET_ARM)
26628 /* In ARM mode, frame pointer points to first saved register.
26629 Restore stack pointer to last saved register. */
26630 amount = offsets->frame - offsets->saved_regs;
26632 /* Force out any pending memory operations that reference stacked data
26633 before stack de-allocation occurs. */
26634 emit_insn (gen_blockage ());
26635 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26636 hard_frame_pointer_rtx,
26637 GEN_INT (amount)));
26638 arm_add_cfa_adjust_cfa_note (insn, amount,
26639 stack_pointer_rtx,
26640 hard_frame_pointer_rtx);
26642 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26643 deleted. */
26644 emit_insn (gen_force_register_use (stack_pointer_rtx));
26646 else
26648 /* In Thumb-2 mode, the frame pointer points to the last saved
26649 register. */
26650 amount = offsets->locals_base - offsets->saved_regs;
26651 if (amount)
26653 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26654 hard_frame_pointer_rtx,
26655 GEN_INT (amount)));
26656 arm_add_cfa_adjust_cfa_note (insn, amount,
26657 hard_frame_pointer_rtx,
26658 hard_frame_pointer_rtx);
26661 /* Force out any pending memory operations that reference stacked data
26662 before stack de-allocation occurs. */
26663 emit_insn (gen_blockage ());
26664 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26665 hard_frame_pointer_rtx));
26666 arm_add_cfa_adjust_cfa_note (insn, 0,
26667 stack_pointer_rtx,
26668 hard_frame_pointer_rtx);
26669 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26670 deleted. */
26671 emit_insn (gen_force_register_use (stack_pointer_rtx));
26674 else
26676 /* Pop off outgoing args and local frame to adjust stack pointer to
26677 last saved register. */
26678 amount = offsets->outgoing_args - offsets->saved_regs;
26679 if (amount)
26681 rtx tmp;
26682 /* Force out any pending memory operations that reference stacked data
26683 before stack de-allocation occurs. */
26684 emit_insn (gen_blockage ());
26685 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26686 stack_pointer_rtx,
26687 GEN_INT (amount)));
26688 arm_add_cfa_adjust_cfa_note (tmp, amount,
26689 stack_pointer_rtx, stack_pointer_rtx);
26690 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26691 not deleted. */
26692 emit_insn (gen_force_register_use (stack_pointer_rtx));
26696 if (TARGET_HARD_FLOAT && TARGET_VFP)
26698 /* Generate VFP register multi-pop. */
26699 int end_reg = LAST_VFP_REGNUM + 1;
26701 /* Scan the registers in reverse order. We need to match
26702 any groupings made in the prologue and generate matching
26703 vldm operations. The need to match groups is because,
26704 unlike pop, vldm can only do consecutive regs. */
26705 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26706 /* Look for a case where a reg does not need restoring. */
26707 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26708 && (!df_regs_ever_live_p (i + 1)
26709 || call_used_regs[i + 1]))
26711 /* Restore the regs discovered so far (from reg+2 to
26712 end_reg). */
26713 if (end_reg > i + 2)
26714 arm_emit_vfp_multi_reg_pop (i + 2,
26715 (end_reg - (i + 2)) / 2,
26716 stack_pointer_rtx);
26717 end_reg = i;
26720 /* Restore the remaining regs that we have discovered (or possibly
26721 even all of them, if the conditional in the for loop never
26722 fired). */
26723 if (end_reg > i + 2)
26724 arm_emit_vfp_multi_reg_pop (i + 2,
26725 (end_reg - (i + 2)) / 2,
26726 stack_pointer_rtx);
26729 if (TARGET_IWMMXT)
26730 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26731 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26733 rtx insn;
26734 rtx addr = gen_rtx_MEM (V2SImode,
26735 gen_rtx_POST_INC (SImode,
26736 stack_pointer_rtx));
26737 set_mem_alias_set (addr, get_frame_alias_set ());
26738 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26739 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26740 gen_rtx_REG (V2SImode, i),
26741 NULL_RTX);
26742 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26743 stack_pointer_rtx, stack_pointer_rtx);
26746 if (saved_regs_mask)
26748 rtx insn;
26749 bool return_in_pc = false;
26751 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26752 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26753 && !IS_STACKALIGN (func_type)
26754 && really_return
26755 && crtl->args.pretend_args_size == 0
26756 && saved_regs_mask & (1 << LR_REGNUM)
26757 && !crtl->calls_eh_return)
26759 saved_regs_mask &= ~(1 << LR_REGNUM);
26760 saved_regs_mask |= (1 << PC_REGNUM);
26761 return_in_pc = true;
26764 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26766 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26767 if (saved_regs_mask & (1 << i))
26769 rtx addr = gen_rtx_MEM (SImode,
26770 gen_rtx_POST_INC (SImode,
26771 stack_pointer_rtx));
26772 set_mem_alias_set (addr, get_frame_alias_set ());
26774 if (i == PC_REGNUM)
26776 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26777 XVECEXP (insn, 0, 0) = ret_rtx;
26778 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
26779 gen_rtx_REG (SImode, i),
26780 addr);
26781 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26782 insn = emit_jump_insn (insn);
26784 else
26786 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26787 addr));
26788 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26789 gen_rtx_REG (SImode, i),
26790 NULL_RTX);
26791 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26792 stack_pointer_rtx,
26793 stack_pointer_rtx);
26797 else
26799 if (TARGET_LDRD
26800 && current_tune->prefer_ldrd_strd
26801 && !optimize_function_for_size_p (cfun))
26803 if (TARGET_THUMB2)
26804 thumb2_emit_ldrd_pop (saved_regs_mask);
26805 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26806 arm_emit_ldrd_pop (saved_regs_mask);
26807 else
26808 arm_emit_multi_reg_pop (saved_regs_mask);
26810 else
26811 arm_emit_multi_reg_pop (saved_regs_mask);
26814 if (return_in_pc == true)
26815 return;
26818 if (crtl->args.pretend_args_size)
26820 int i, j;
26821 rtx dwarf = NULL_RTX;
26822 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26823 stack_pointer_rtx,
26824 GEN_INT (crtl->args.pretend_args_size)));
26826 RTX_FRAME_RELATED_P (tmp) = 1;
26828 if (cfun->machine->uses_anonymous_args)
26830 /* Restore pretend args. Refer arm_expand_prologue on how to save
26831 pretend_args in stack. */
26832 int num_regs = crtl->args.pretend_args_size / 4;
26833 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26834 for (j = 0, i = 0; j < num_regs; i++)
26835 if (saved_regs_mask & (1 << i))
26837 rtx reg = gen_rtx_REG (SImode, i);
26838 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26839 j++;
26841 REG_NOTES (tmp) = dwarf;
26843 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
26844 stack_pointer_rtx, stack_pointer_rtx);
26847 if (!really_return)
26848 return;
26850 if (crtl->calls_eh_return)
26851 emit_insn (gen_addsi3 (stack_pointer_rtx,
26852 stack_pointer_rtx,
26853 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26855 if (IS_STACKALIGN (func_type))
26856 /* Restore the original stack pointer. Before prologue, the stack was
26857 realigned and the original stack pointer saved in r0. For details,
26858 see comment in arm_expand_prologue. */
26859 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
26861 emit_jump_insn (simple_return_rtx);
26864 /* Implementation of insn prologue_thumb1_interwork. This is the first
26865 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26867 const char *
26868 thumb1_output_interwork (void)
26870 const char * name;
26871 FILE *f = asm_out_file;
26873 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26874 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26875 == SYMBOL_REF);
26876 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26878 /* Generate code sequence to switch us into Thumb mode. */
26879 /* The .code 32 directive has already been emitted by
26880 ASM_DECLARE_FUNCTION_NAME. */
26881 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26882 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26884 /* Generate a label, so that the debugger will notice the
26885 change in instruction sets. This label is also used by
26886 the assembler to bypass the ARM code when this function
26887 is called from a Thumb encoded function elsewhere in the
26888 same file. Hence the definition of STUB_NAME here must
26889 agree with the definition in gas/config/tc-arm.c. */
26891 #define STUB_NAME ".real_start_of"
26893 fprintf (f, "\t.code\t16\n");
26894 #ifdef ARM_PE
26895 if (arm_dllexport_name_p (name))
26896 name = arm_strip_name_encoding (name);
26897 #endif
26898 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26899 fprintf (f, "\t.thumb_func\n");
26900 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26902 return "";
26905 /* Handle the case of a double word load into a low register from
26906 a computed memory address. The computed address may involve a
26907 register which is overwritten by the load. */
26908 const char *
26909 thumb_load_double_from_address (rtx *operands)
26911 rtx addr;
26912 rtx base;
26913 rtx offset;
26914 rtx arg1;
26915 rtx arg2;
26917 gcc_assert (REG_P (operands[0]));
26918 gcc_assert (MEM_P (operands[1]));
26920 /* Get the memory address. */
26921 addr = XEXP (operands[1], 0);
26923 /* Work out how the memory address is computed. */
26924 switch (GET_CODE (addr))
26926 case REG:
26927 operands[2] = adjust_address (operands[1], SImode, 4);
26929 if (REGNO (operands[0]) == REGNO (addr))
26931 output_asm_insn ("ldr\t%H0, %2", operands);
26932 output_asm_insn ("ldr\t%0, %1", operands);
26934 else
26936 output_asm_insn ("ldr\t%0, %1", operands);
26937 output_asm_insn ("ldr\t%H0, %2", operands);
26939 break;
26941 case CONST:
26942 /* Compute <address> + 4 for the high order load. */
26943 operands[2] = adjust_address (operands[1], SImode, 4);
26945 output_asm_insn ("ldr\t%0, %1", operands);
26946 output_asm_insn ("ldr\t%H0, %2", operands);
26947 break;
26949 case PLUS:
26950 arg1 = XEXP (addr, 0);
26951 arg2 = XEXP (addr, 1);
26953 if (CONSTANT_P (arg1))
26954 base = arg2, offset = arg1;
26955 else
26956 base = arg1, offset = arg2;
26958 gcc_assert (REG_P (base));
26960 /* Catch the case of <address> = <reg> + <reg> */
26961 if (REG_P (offset))
26963 int reg_offset = REGNO (offset);
26964 int reg_base = REGNO (base);
26965 int reg_dest = REGNO (operands[0]);
26967 /* Add the base and offset registers together into the
26968 higher destination register. */
26969 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26970 reg_dest + 1, reg_base, reg_offset);
26972 /* Load the lower destination register from the address in
26973 the higher destination register. */
26974 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26975 reg_dest, reg_dest + 1);
26977 /* Load the higher destination register from its own address
26978 plus 4. */
26979 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26980 reg_dest + 1, reg_dest + 1);
26982 else
26984 /* Compute <address> + 4 for the high order load. */
26985 operands[2] = adjust_address (operands[1], SImode, 4);
26987 /* If the computed address is held in the low order register
26988 then load the high order register first, otherwise always
26989 load the low order register first. */
26990 if (REGNO (operands[0]) == REGNO (base))
26992 output_asm_insn ("ldr\t%H0, %2", operands);
26993 output_asm_insn ("ldr\t%0, %1", operands);
26995 else
26997 output_asm_insn ("ldr\t%0, %1", operands);
26998 output_asm_insn ("ldr\t%H0, %2", operands);
27001 break;
27003 case LABEL_REF:
27004 /* With no registers to worry about we can just load the value
27005 directly. */
27006 operands[2] = adjust_address (operands[1], SImode, 4);
27008 output_asm_insn ("ldr\t%H0, %2", operands);
27009 output_asm_insn ("ldr\t%0, %1", operands);
27010 break;
27012 default:
27013 gcc_unreachable ();
27016 return "";
27019 const char *
27020 thumb_output_move_mem_multiple (int n, rtx *operands)
27022 rtx tmp;
27024 switch (n)
27026 case 2:
27027 if (REGNO (operands[4]) > REGNO (operands[5]))
27029 tmp = operands[4];
27030 operands[4] = operands[5];
27031 operands[5] = tmp;
27033 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27034 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27035 break;
27037 case 3:
27038 if (REGNO (operands[4]) > REGNO (operands[5]))
27040 tmp = operands[4];
27041 operands[4] = operands[5];
27042 operands[5] = tmp;
27044 if (REGNO (operands[5]) > REGNO (operands[6]))
27046 tmp = operands[5];
27047 operands[5] = operands[6];
27048 operands[6] = tmp;
27050 if (REGNO (operands[4]) > REGNO (operands[5]))
27052 tmp = operands[4];
27053 operands[4] = operands[5];
27054 operands[5] = tmp;
27057 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27058 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27059 break;
27061 default:
27062 gcc_unreachable ();
27065 return "";
27068 /* Output a call-via instruction for thumb state. */
27069 const char *
27070 thumb_call_via_reg (rtx reg)
27072 int regno = REGNO (reg);
27073 rtx *labelp;
27075 gcc_assert (regno < LR_REGNUM);
27077 /* If we are in the normal text section we can use a single instance
27078 per compilation unit. If we are doing function sections, then we need
27079 an entry per section, since we can't rely on reachability. */
27080 if (in_section == text_section)
27082 thumb_call_reg_needed = 1;
27084 if (thumb_call_via_label[regno] == NULL)
27085 thumb_call_via_label[regno] = gen_label_rtx ();
27086 labelp = thumb_call_via_label + regno;
27088 else
27090 if (cfun->machine->call_via[regno] == NULL)
27091 cfun->machine->call_via[regno] = gen_label_rtx ();
27092 labelp = cfun->machine->call_via + regno;
27095 output_asm_insn ("bl\t%a0", labelp);
27096 return "";
27099 /* Routines for generating rtl. */
27100 void
27101 thumb_expand_movmemqi (rtx *operands)
27103 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27104 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27105 HOST_WIDE_INT len = INTVAL (operands[2]);
27106 HOST_WIDE_INT offset = 0;
27108 while (len >= 12)
27110 emit_insn (gen_movmem12b (out, in, out, in));
27111 len -= 12;
27114 if (len >= 8)
27116 emit_insn (gen_movmem8b (out, in, out, in));
27117 len -= 8;
27120 if (len >= 4)
27122 rtx reg = gen_reg_rtx (SImode);
27123 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27124 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27125 len -= 4;
27126 offset += 4;
27129 if (len >= 2)
27131 rtx reg = gen_reg_rtx (HImode);
27132 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27133 plus_constant (Pmode, in,
27134 offset))));
27135 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27136 offset)),
27137 reg));
27138 len -= 2;
27139 offset += 2;
27142 if (len)
27144 rtx reg = gen_reg_rtx (QImode);
27145 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27146 plus_constant (Pmode, in,
27147 offset))));
27148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27149 offset)),
27150 reg));
27154 void
27155 thumb_reload_out_hi (rtx *operands)
27157 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27160 /* Handle reading a half-word from memory during reload. */
27161 void
27162 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27164 gcc_unreachable ();
27167 /* Return the length of a function name prefix
27168 that starts with the character 'c'. */
27169 static int
27170 arm_get_strip_length (int c)
27172 switch (c)
27174 ARM_NAME_ENCODING_LENGTHS
27175 default: return 0;
27179 /* Return a pointer to a function's name with any
27180 and all prefix encodings stripped from it. */
27181 const char *
27182 arm_strip_name_encoding (const char *name)
27184 int skip;
27186 while ((skip = arm_get_strip_length (* name)))
27187 name += skip;
27189 return name;
27192 /* If there is a '*' anywhere in the name's prefix, then
27193 emit the stripped name verbatim, otherwise prepend an
27194 underscore if leading underscores are being used. */
27195 void
27196 arm_asm_output_labelref (FILE *stream, const char *name)
27198 int skip;
27199 int verbatim = 0;
27201 while ((skip = arm_get_strip_length (* name)))
27203 verbatim |= (*name == '*');
27204 name += skip;
27207 if (verbatim)
27208 fputs (name, stream);
27209 else
27210 asm_fprintf (stream, "%U%s", name);
27213 /* This function is used to emit an EABI tag and its associated value.
27214 We emit the numerical value of the tag in case the assembler does not
27215 support textual tags. (Eg gas prior to 2.20). If requested we include
27216 the tag name in a comment so that anyone reading the assembler output
27217 will know which tag is being set.
27219 This function is not static because arm-c.c needs it too. */
27221 void
27222 arm_emit_eabi_attribute (const char *name, int num, int val)
27224 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27225 if (flag_verbose_asm || flag_debug_asm)
27226 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27227 asm_fprintf (asm_out_file, "\n");
27230 static void
27231 arm_file_start (void)
27233 int val;
27235 if (TARGET_UNIFIED_ASM)
27236 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27238 if (TARGET_BPABI)
27240 const char *fpu_name;
27241 if (arm_selected_arch)
27242 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27243 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27244 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27245 else
27246 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
27248 if (TARGET_SOFT_FLOAT)
27250 fpu_name = "softvfp";
27252 else
27254 fpu_name = arm_fpu_desc->name;
27255 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27257 if (TARGET_HARD_FLOAT)
27258 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27259 if (TARGET_HARD_FLOAT_ABI)
27260 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27263 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27265 /* Some of these attributes only apply when the corresponding features
27266 are used. However we don't have any easy way of figuring this out.
27267 Conservatively record the setting that would have been used. */
27269 if (flag_rounding_math)
27270 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27272 if (!flag_unsafe_math_optimizations)
27274 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27275 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27277 if (flag_signaling_nans)
27278 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27280 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27281 flag_finite_math_only ? 1 : 3);
27283 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27284 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27285 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27286 flag_short_enums ? 1 : 2);
27288 /* Tag_ABI_optimization_goals. */
27289 if (optimize_size)
27290 val = 4;
27291 else if (optimize >= 2)
27292 val = 2;
27293 else if (optimize)
27294 val = 1;
27295 else
27296 val = 6;
27297 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27299 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27300 unaligned_access);
27302 if (arm_fp16_format)
27303 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27304 (int) arm_fp16_format);
27306 if (arm_lang_output_object_attributes_hook)
27307 arm_lang_output_object_attributes_hook();
27310 default_file_start ();
27313 static void
27314 arm_file_end (void)
27316 int regno;
27318 if (NEED_INDICATE_EXEC_STACK)
27319 /* Add .note.GNU-stack. */
27320 file_end_indicate_exec_stack ();
27322 if (! thumb_call_reg_needed)
27323 return;
27325 switch_to_section (text_section);
27326 asm_fprintf (asm_out_file, "\t.code 16\n");
27327 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27329 for (regno = 0; regno < LR_REGNUM; regno++)
27331 rtx label = thumb_call_via_label[regno];
27333 if (label != 0)
27335 targetm.asm_out.internal_label (asm_out_file, "L",
27336 CODE_LABEL_NUMBER (label));
27337 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27342 #ifndef ARM_PE
27343 /* Symbols in the text segment can be accessed without indirecting via the
27344 constant pool; it may take an extra binary operation, but this is still
27345 faster than indirecting via memory. Don't do this when not optimizing,
27346 since we won't be calculating al of the offsets necessary to do this
27347 simplification. */
27349 static void
27350 arm_encode_section_info (tree decl, rtx rtl, int first)
27352 if (optimize > 0 && TREE_CONSTANT (decl))
27353 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27355 default_encode_section_info (decl, rtl, first);
27357 #endif /* !ARM_PE */
27359 static void
27360 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27362 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27363 && !strcmp (prefix, "L"))
27365 arm_ccfsm_state = 0;
27366 arm_target_insn = NULL;
27368 default_internal_label (stream, prefix, labelno);
27371 /* Output code to add DELTA to the first argument, and then jump
27372 to FUNCTION. Used for C++ multiple inheritance. */
27373 static void
27374 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
27375 HOST_WIDE_INT delta,
27376 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
27377 tree function)
27379 static int thunk_label = 0;
27380 char label[256];
27381 char labelpc[256];
27382 int mi_delta = delta;
27383 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27384 int shift = 0;
27385 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27386 ? 1 : 0);
27387 if (mi_delta < 0)
27388 mi_delta = - mi_delta;
27390 final_start_function (emit_barrier (), file, 1);
27392 if (TARGET_THUMB1)
27394 int labelno = thunk_label++;
27395 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27396 /* Thunks are entered in arm mode when avaiable. */
27397 if (TARGET_THUMB1_ONLY)
27399 /* push r3 so we can use it as a temporary. */
27400 /* TODO: Omit this save if r3 is not used. */
27401 fputs ("\tpush {r3}\n", file);
27402 fputs ("\tldr\tr3, ", file);
27404 else
27406 fputs ("\tldr\tr12, ", file);
27408 assemble_name (file, label);
27409 fputc ('\n', file);
27410 if (flag_pic)
27412 /* If we are generating PIC, the ldr instruction below loads
27413 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27414 the address of the add + 8, so we have:
27416 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27417 = target + 1.
27419 Note that we have "+ 1" because some versions of GNU ld
27420 don't set the low bit of the result for R_ARM_REL32
27421 relocations against thumb function symbols.
27422 On ARMv6M this is +4, not +8. */
27423 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27424 assemble_name (file, labelpc);
27425 fputs (":\n", file);
27426 if (TARGET_THUMB1_ONLY)
27428 /* This is 2 insns after the start of the thunk, so we know it
27429 is 4-byte aligned. */
27430 fputs ("\tadd\tr3, pc, r3\n", file);
27431 fputs ("\tmov r12, r3\n", file);
27433 else
27434 fputs ("\tadd\tr12, pc, r12\n", file);
27436 else if (TARGET_THUMB1_ONLY)
27437 fputs ("\tmov r12, r3\n", file);
27439 if (TARGET_THUMB1_ONLY)
27441 if (mi_delta > 255)
27443 fputs ("\tldr\tr3, ", file);
27444 assemble_name (file, label);
27445 fputs ("+4\n", file);
27446 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
27447 mi_op, this_regno, this_regno);
27449 else if (mi_delta != 0)
27451 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27452 mi_op, this_regno, this_regno,
27453 mi_delta);
27456 else
27458 /* TODO: Use movw/movt for large constants when available. */
27459 while (mi_delta != 0)
27461 if ((mi_delta & (3 << shift)) == 0)
27462 shift += 2;
27463 else
27465 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27466 mi_op, this_regno, this_regno,
27467 mi_delta & (0xff << shift));
27468 mi_delta &= ~(0xff << shift);
27469 shift += 8;
27473 if (TARGET_THUMB1)
27475 if (TARGET_THUMB1_ONLY)
27476 fputs ("\tpop\t{r3}\n", file);
27478 fprintf (file, "\tbx\tr12\n");
27479 ASM_OUTPUT_ALIGN (file, 2);
27480 assemble_name (file, label);
27481 fputs (":\n", file);
27482 if (flag_pic)
27484 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
27485 rtx tem = XEXP (DECL_RTL (function), 0);
27486 tem = plus_constant (GET_MODE (tem), tem, -7);
27487 tem = gen_rtx_MINUS (GET_MODE (tem),
27488 tem,
27489 gen_rtx_SYMBOL_REF (Pmode,
27490 ggc_strdup (labelpc)));
27491 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27493 else
27494 /* Output ".word .LTHUNKn". */
27495 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27497 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27498 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27500 else
27502 fputs ("\tb\t", file);
27503 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27504 if (NEED_PLT_RELOC)
27505 fputs ("(PLT)", file);
27506 fputc ('\n', file);
27509 final_end_function ();
27513 arm_emit_vector_const (FILE *file, rtx x)
27515 int i;
27516 const char * pattern;
27518 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27520 switch (GET_MODE (x))
27522 case V2SImode: pattern = "%08x"; break;
27523 case V4HImode: pattern = "%04x"; break;
27524 case V8QImode: pattern = "%02x"; break;
27525 default: gcc_unreachable ();
27528 fprintf (file, "0x");
27529 for (i = CONST_VECTOR_NUNITS (x); i--;)
27531 rtx element;
27533 element = CONST_VECTOR_ELT (x, i);
27534 fprintf (file, pattern, INTVAL (element));
27537 return 1;
27540 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27541 HFmode constant pool entries are actually loaded with ldr. */
27542 void
27543 arm_emit_fp16_const (rtx c)
27545 REAL_VALUE_TYPE r;
27546 long bits;
27548 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
27549 bits = real_to_target (NULL, &r, HFmode);
27550 if (WORDS_BIG_ENDIAN)
27551 assemble_zeros (2);
27552 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27553 if (!WORDS_BIG_ENDIAN)
27554 assemble_zeros (2);
27557 const char *
27558 arm_output_load_gr (rtx *operands)
27560 rtx reg;
27561 rtx offset;
27562 rtx wcgr;
27563 rtx sum;
27565 if (!MEM_P (operands [1])
27566 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27567 || !REG_P (reg = XEXP (sum, 0))
27568 || !CONST_INT_P (offset = XEXP (sum, 1))
27569 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27570 return "wldrw%?\t%0, %1";
27572 /* Fix up an out-of-range load of a GR register. */
27573 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27574 wcgr = operands[0];
27575 operands[0] = reg;
27576 output_asm_insn ("ldr%?\t%0, %1", operands);
27578 operands[0] = wcgr;
27579 operands[1] = reg;
27580 output_asm_insn ("tmcr%?\t%0, %1", operands);
27581 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27583 return "";
27586 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27588 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27589 named arg and all anonymous args onto the stack.
27590 XXX I know the prologue shouldn't be pushing registers, but it is faster
27591 that way. */
27593 static void
27594 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27595 enum machine_mode mode,
27596 tree type,
27597 int *pretend_size,
27598 int second_time ATTRIBUTE_UNUSED)
27600 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27601 int nregs;
27603 cfun->machine->uses_anonymous_args = 1;
27604 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27606 nregs = pcum->aapcs_ncrn;
27607 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
27608 nregs++;
27610 else
27611 nregs = pcum->nregs;
27613 if (nregs < NUM_ARG_REGS)
27614 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27617 /* We can't rely on the caller doing the proper promotion when
27618 using APCS or ATPCS. */
27620 static bool
27621 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27623 return !TARGET_AAPCS_BASED;
27626 static enum machine_mode
27627 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27628 enum machine_mode mode,
27629 int *punsignedp ATTRIBUTE_UNUSED,
27630 const_tree fntype ATTRIBUTE_UNUSED,
27631 int for_return ATTRIBUTE_UNUSED)
27633 if (GET_MODE_CLASS (mode) == MODE_INT
27634 && GET_MODE_SIZE (mode) < 4)
27635 return SImode;
27637 return mode;
27640 /* AAPCS based ABIs use short enums by default. */
27642 static bool
27643 arm_default_short_enums (void)
27645 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
27649 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27651 static bool
27652 arm_align_anon_bitfield (void)
27654 return TARGET_AAPCS_BASED;
27658 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27660 static tree
27661 arm_cxx_guard_type (void)
27663 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27667 /* The EABI says test the least significant bit of a guard variable. */
27669 static bool
27670 arm_cxx_guard_mask_bit (void)
27672 return TARGET_AAPCS_BASED;
27676 /* The EABI specifies that all array cookies are 8 bytes long. */
27678 static tree
27679 arm_get_cookie_size (tree type)
27681 tree size;
27683 if (!TARGET_AAPCS_BASED)
27684 return default_cxx_get_cookie_size (type);
27686 size = build_int_cst (sizetype, 8);
27687 return size;
27691 /* The EABI says that array cookies should also contain the element size. */
27693 static bool
27694 arm_cookie_has_size (void)
27696 return TARGET_AAPCS_BASED;
27700 /* The EABI says constructors and destructors should return a pointer to
27701 the object constructed/destroyed. */
27703 static bool
27704 arm_cxx_cdtor_returns_this (void)
27706 return TARGET_AAPCS_BASED;
27709 /* The EABI says that an inline function may never be the key
27710 method. */
27712 static bool
27713 arm_cxx_key_method_may_be_inline (void)
27715 return !TARGET_AAPCS_BASED;
27718 static void
27719 arm_cxx_determine_class_data_visibility (tree decl)
27721 if (!TARGET_AAPCS_BASED
27722 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27723 return;
27725 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27726 is exported. However, on systems without dynamic vague linkage,
27727 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27728 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27729 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27730 else
27731 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27732 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27735 static bool
27736 arm_cxx_class_data_always_comdat (void)
27738 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27739 vague linkage if the class has no key function. */
27740 return !TARGET_AAPCS_BASED;
27744 /* The EABI says __aeabi_atexit should be used to register static
27745 destructors. */
27747 static bool
27748 arm_cxx_use_aeabi_atexit (void)
27750 return TARGET_AAPCS_BASED;
27754 void
27755 arm_set_return_address (rtx source, rtx scratch)
27757 arm_stack_offsets *offsets;
27758 HOST_WIDE_INT delta;
27759 rtx addr;
27760 unsigned long saved_regs;
27762 offsets = arm_get_frame_offsets ();
27763 saved_regs = offsets->saved_regs_mask;
27765 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27766 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27767 else
27769 if (frame_pointer_needed)
27770 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27771 else
27773 /* LR will be the first saved register. */
27774 delta = offsets->outgoing_args - (offsets->frame + 4);
27777 if (delta >= 4096)
27779 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27780 GEN_INT (delta & ~4095)));
27781 addr = scratch;
27782 delta &= 4095;
27784 else
27785 addr = stack_pointer_rtx;
27787 addr = plus_constant (Pmode, addr, delta);
27789 emit_move_insn (gen_frame_mem (Pmode, addr), source);
27794 void
27795 thumb_set_return_address (rtx source, rtx scratch)
27797 arm_stack_offsets *offsets;
27798 HOST_WIDE_INT delta;
27799 HOST_WIDE_INT limit;
27800 int reg;
27801 rtx addr;
27802 unsigned long mask;
27804 emit_use (source);
27806 offsets = arm_get_frame_offsets ();
27807 mask = offsets->saved_regs_mask;
27808 if (mask & (1 << LR_REGNUM))
27810 limit = 1024;
27811 /* Find the saved regs. */
27812 if (frame_pointer_needed)
27814 delta = offsets->soft_frame - offsets->saved_args;
27815 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27816 if (TARGET_THUMB1)
27817 limit = 128;
27819 else
27821 delta = offsets->outgoing_args - offsets->saved_args;
27822 reg = SP_REGNUM;
27824 /* Allow for the stack frame. */
27825 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27826 delta -= 16;
27827 /* The link register is always the first saved register. */
27828 delta -= 4;
27830 /* Construct the address. */
27831 addr = gen_rtx_REG (SImode, reg);
27832 if (delta > limit)
27834 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27835 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27836 addr = scratch;
27838 else
27839 addr = plus_constant (Pmode, addr, delta);
27841 emit_move_insn (gen_frame_mem (Pmode, addr), source);
27843 else
27844 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27847 /* Implements target hook vector_mode_supported_p. */
27848 bool
27849 arm_vector_mode_supported_p (enum machine_mode mode)
27851 /* Neon also supports V2SImode, etc. listed in the clause below. */
27852 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27853 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
27854 return true;
27856 if ((TARGET_NEON || TARGET_IWMMXT)
27857 && ((mode == V2SImode)
27858 || (mode == V4HImode)
27859 || (mode == V8QImode)))
27860 return true;
27862 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27863 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27864 || mode == V2HAmode))
27865 return true;
27867 return false;
27870 /* Implements target hook array_mode_supported_p. */
27872 static bool
27873 arm_array_mode_supported_p (enum machine_mode mode,
27874 unsigned HOST_WIDE_INT nelems)
27876 if (TARGET_NEON
27877 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27878 && (nelems >= 2 && nelems <= 4))
27879 return true;
27881 return false;
27884 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27885 registers when autovectorizing for Neon, at least until multiple vector
27886 widths are supported properly by the middle-end. */
27888 static enum machine_mode
27889 arm_preferred_simd_mode (enum machine_mode mode)
27891 if (TARGET_NEON)
27892 switch (mode)
27894 case SFmode:
27895 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27896 case SImode:
27897 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27898 case HImode:
27899 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27900 case QImode:
27901 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27902 case DImode:
27903 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27904 return V2DImode;
27905 break;
27907 default:;
27910 if (TARGET_REALLY_IWMMXT)
27911 switch (mode)
27913 case SImode:
27914 return V2SImode;
27915 case HImode:
27916 return V4HImode;
27917 case QImode:
27918 return V8QImode;
27920 default:;
27923 return word_mode;
27926 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27928 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27929 using r0-r4 for function arguments, r7 for the stack frame and don't have
27930 enough left over to do doubleword arithmetic. For Thumb-2 all the
27931 potentially problematic instructions accept high registers so this is not
27932 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27933 that require many low registers. */
27934 static bool
27935 arm_class_likely_spilled_p (reg_class_t rclass)
27937 if ((TARGET_THUMB1 && rclass == LO_REGS)
27938 || rclass == CC_REG)
27939 return true;
27941 return false;
27944 /* Implements target hook small_register_classes_for_mode_p. */
27945 bool
27946 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
27948 return TARGET_THUMB1;
27951 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27952 ARM insns and therefore guarantee that the shift count is modulo 256.
27953 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27954 guarantee no particular behavior for out-of-range counts. */
27956 static unsigned HOST_WIDE_INT
27957 arm_shift_truncation_mask (enum machine_mode mode)
27959 return mode == SImode ? 255 : 0;
27963 /* Map internal gcc register numbers to DWARF2 register numbers. */
27965 unsigned int
27966 arm_dbx_register_number (unsigned int regno)
27968 if (regno < 16)
27969 return regno;
27971 if (IS_VFP_REGNUM (regno))
27973 /* See comment in arm_dwarf_register_span. */
27974 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27975 return 64 + regno - FIRST_VFP_REGNUM;
27976 else
27977 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27980 if (IS_IWMMXT_GR_REGNUM (regno))
27981 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27983 if (IS_IWMMXT_REGNUM (regno))
27984 return 112 + regno - FIRST_IWMMXT_REGNUM;
27986 gcc_unreachable ();
27989 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27990 GCC models tham as 64 32-bit registers, so we need to describe this to
27991 the DWARF generation code. Other registers can use the default. */
27992 static rtx
27993 arm_dwarf_register_span (rtx rtl)
27995 enum machine_mode mode;
27996 unsigned regno;
27997 rtx parts[8];
27998 int nregs;
27999 int i;
28001 regno = REGNO (rtl);
28002 if (!IS_VFP_REGNUM (regno))
28003 return NULL_RTX;
28005 /* XXX FIXME: The EABI defines two VFP register ranges:
28006 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28007 256-287: D0-D31
28008 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28009 corresponding D register. Until GDB supports this, we shall use the
28010 legacy encodings. We also use these encodings for D0-D15 for
28011 compatibility with older debuggers. */
28012 mode = GET_MODE (rtl);
28013 if (GET_MODE_SIZE (mode) < 8)
28014 return NULL_RTX;
28016 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28018 nregs = GET_MODE_SIZE (mode) / 4;
28019 for (i = 0; i < nregs; i += 2)
28020 if (TARGET_BIG_END)
28022 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28023 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28025 else
28027 parts[i] = gen_rtx_REG (SImode, regno + i);
28028 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28031 else
28033 nregs = GET_MODE_SIZE (mode) / 8;
28034 for (i = 0; i < nregs; i++)
28035 parts[i] = gen_rtx_REG (DImode, regno + i);
28038 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28041 #if ARM_UNWIND_INFO
28042 /* Emit unwind directives for a store-multiple instruction or stack pointer
28043 push during alignment.
28044 These should only ever be generated by the function prologue code, so
28045 expect them to have a particular form. */
28047 static void
28048 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28050 int i;
28051 HOST_WIDE_INT offset;
28052 HOST_WIDE_INT nregs;
28053 int reg_size;
28054 unsigned reg;
28055 unsigned lastreg;
28056 rtx e;
28058 e = XVECEXP (p, 0, 0);
28059 if (GET_CODE (e) != SET)
28060 abort ();
28062 /* First insn will adjust the stack pointer. */
28063 if (GET_CODE (e) != SET
28064 || !REG_P (XEXP (e, 0))
28065 || REGNO (XEXP (e, 0)) != SP_REGNUM
28066 || GET_CODE (XEXP (e, 1)) != PLUS)
28067 abort ();
28069 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
28070 nregs = XVECLEN (p, 0) - 1;
28072 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
28073 if (reg < 16)
28075 /* The function prologue may also push pc, but not annotate it as it is
28076 never restored. We turn this into a stack pointer adjustment. */
28077 if (nregs * 4 == offset - 4)
28079 fprintf (asm_out_file, "\t.pad #4\n");
28080 offset -= 4;
28082 reg_size = 4;
28083 fprintf (asm_out_file, "\t.save {");
28085 else if (IS_VFP_REGNUM (reg))
28087 reg_size = 8;
28088 fprintf (asm_out_file, "\t.vsave {");
28090 else
28091 /* Unknown register type. */
28092 abort ();
28094 /* If the stack increment doesn't match the size of the saved registers,
28095 something has gone horribly wrong. */
28096 if (offset != nregs * reg_size)
28097 abort ();
28099 offset = 0;
28100 lastreg = 0;
28101 /* The remaining insns will describe the stores. */
28102 for (i = 1; i <= nregs; i++)
28104 /* Expect (set (mem <addr>) (reg)).
28105 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28106 e = XVECEXP (p, 0, i);
28107 if (GET_CODE (e) != SET
28108 || !MEM_P (XEXP (e, 0))
28109 || !REG_P (XEXP (e, 1)))
28110 abort ();
28112 reg = REGNO (XEXP (e, 1));
28113 if (reg < lastreg)
28114 abort ();
28116 if (i != 1)
28117 fprintf (asm_out_file, ", ");
28118 /* We can't use %r for vfp because we need to use the
28119 double precision register names. */
28120 if (IS_VFP_REGNUM (reg))
28121 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28122 else
28123 asm_fprintf (asm_out_file, "%r", reg);
28125 #ifdef ENABLE_CHECKING
28126 /* Check that the addresses are consecutive. */
28127 e = XEXP (XEXP (e, 0), 0);
28128 if (GET_CODE (e) == PLUS)
28130 offset += reg_size;
28131 if (!REG_P (XEXP (e, 0))
28132 || REGNO (XEXP (e, 0)) != SP_REGNUM
28133 || !CONST_INT_P (XEXP (e, 1))
28134 || offset != INTVAL (XEXP (e, 1)))
28135 abort ();
28137 else if (i != 1
28138 || !REG_P (e)
28139 || REGNO (e) != SP_REGNUM)
28140 abort ();
28141 #endif
28143 fprintf (asm_out_file, "}\n");
28146 /* Emit unwind directives for a SET. */
28148 static void
28149 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28151 rtx e0;
28152 rtx e1;
28153 unsigned reg;
28155 e0 = XEXP (p, 0);
28156 e1 = XEXP (p, 1);
28157 switch (GET_CODE (e0))
28159 case MEM:
28160 /* Pushing a single register. */
28161 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28162 || !REG_P (XEXP (XEXP (e0, 0), 0))
28163 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28164 abort ();
28166 asm_fprintf (asm_out_file, "\t.save ");
28167 if (IS_VFP_REGNUM (REGNO (e1)))
28168 asm_fprintf(asm_out_file, "{d%d}\n",
28169 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28170 else
28171 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28172 break;
28174 case REG:
28175 if (REGNO (e0) == SP_REGNUM)
28177 /* A stack increment. */
28178 if (GET_CODE (e1) != PLUS
28179 || !REG_P (XEXP (e1, 0))
28180 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28181 || !CONST_INT_P (XEXP (e1, 1)))
28182 abort ();
28184 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28185 -INTVAL (XEXP (e1, 1)));
28187 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28189 HOST_WIDE_INT offset;
28191 if (GET_CODE (e1) == PLUS)
28193 if (!REG_P (XEXP (e1, 0))
28194 || !CONST_INT_P (XEXP (e1, 1)))
28195 abort ();
28196 reg = REGNO (XEXP (e1, 0));
28197 offset = INTVAL (XEXP (e1, 1));
28198 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28199 HARD_FRAME_POINTER_REGNUM, reg,
28200 offset);
28202 else if (REG_P (e1))
28204 reg = REGNO (e1);
28205 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28206 HARD_FRAME_POINTER_REGNUM, reg);
28208 else
28209 abort ();
28211 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28213 /* Move from sp to reg. */
28214 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28216 else if (GET_CODE (e1) == PLUS
28217 && REG_P (XEXP (e1, 0))
28218 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28219 && CONST_INT_P (XEXP (e1, 1)))
28221 /* Set reg to offset from sp. */
28222 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28223 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28225 else
28226 abort ();
28227 break;
28229 default:
28230 abort ();
28235 /* Emit unwind directives for the given insn. */
28237 static void
28238 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28240 rtx note, pat;
28241 bool handled_one = false;
28243 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28244 return;
28246 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28247 && (TREE_NOTHROW (current_function_decl)
28248 || crtl->all_throwers_are_sibcalls))
28249 return;
28251 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28252 return;
28254 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28256 switch (REG_NOTE_KIND (note))
28258 case REG_FRAME_RELATED_EXPR:
28259 pat = XEXP (note, 0);
28260 goto found;
28262 case REG_CFA_REGISTER:
28263 pat = XEXP (note, 0);
28264 if (pat == NULL)
28266 pat = PATTERN (insn);
28267 if (GET_CODE (pat) == PARALLEL)
28268 pat = XVECEXP (pat, 0, 0);
28271 /* Only emitted for IS_STACKALIGN re-alignment. */
28273 rtx dest, src;
28274 unsigned reg;
28276 src = SET_SRC (pat);
28277 dest = SET_DEST (pat);
28279 gcc_assert (src == stack_pointer_rtx);
28280 reg = REGNO (dest);
28281 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28282 reg + 0x90, reg);
28284 handled_one = true;
28285 break;
28287 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28288 to get correct dwarf information for shrink-wrap. We should not
28289 emit unwind information for it because these are used either for
28290 pretend arguments or notes to adjust sp and restore registers from
28291 stack. */
28292 case REG_CFA_ADJUST_CFA:
28293 case REG_CFA_RESTORE:
28294 return;
28296 case REG_CFA_DEF_CFA:
28297 case REG_CFA_EXPRESSION:
28298 case REG_CFA_OFFSET:
28299 /* ??? Only handling here what we actually emit. */
28300 gcc_unreachable ();
28302 default:
28303 break;
28306 if (handled_one)
28307 return;
28308 pat = PATTERN (insn);
28309 found:
28311 switch (GET_CODE (pat))
28313 case SET:
28314 arm_unwind_emit_set (asm_out_file, pat);
28315 break;
28317 case SEQUENCE:
28318 /* Store multiple. */
28319 arm_unwind_emit_sequence (asm_out_file, pat);
28320 break;
28322 default:
28323 abort();
28328 /* Output a reference from a function exception table to the type_info
28329 object X. The EABI specifies that the symbol should be relocated by
28330 an R_ARM_TARGET2 relocation. */
28332 static bool
28333 arm_output_ttype (rtx x)
28335 fputs ("\t.word\t", asm_out_file);
28336 output_addr_const (asm_out_file, x);
28337 /* Use special relocations for symbol references. */
28338 if (!CONST_INT_P (x))
28339 fputs ("(TARGET2)", asm_out_file);
28340 fputc ('\n', asm_out_file);
28342 return TRUE;
28345 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28347 static void
28348 arm_asm_emit_except_personality (rtx personality)
28350 fputs ("\t.personality\t", asm_out_file);
28351 output_addr_const (asm_out_file, personality);
28352 fputc ('\n', asm_out_file);
28355 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28357 static void
28358 arm_asm_init_sections (void)
28360 exception_section = get_unnamed_section (0, output_section_asm_op,
28361 "\t.handlerdata");
28363 #endif /* ARM_UNWIND_INFO */
28365 /* Output unwind directives for the start/end of a function. */
28367 void
28368 arm_output_fn_unwind (FILE * f, bool prologue)
28370 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28371 return;
28373 if (prologue)
28374 fputs ("\t.fnstart\n", f);
28375 else
28377 /* If this function will never be unwound, then mark it as such.
28378 The came condition is used in arm_unwind_emit to suppress
28379 the frame annotations. */
28380 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28381 && (TREE_NOTHROW (current_function_decl)
28382 || crtl->all_throwers_are_sibcalls))
28383 fputs("\t.cantunwind\n", f);
28385 fputs ("\t.fnend\n", f);
28389 static bool
28390 arm_emit_tls_decoration (FILE *fp, rtx x)
28392 enum tls_reloc reloc;
28393 rtx val;
28395 val = XVECEXP (x, 0, 0);
28396 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28398 output_addr_const (fp, val);
28400 switch (reloc)
28402 case TLS_GD32:
28403 fputs ("(tlsgd)", fp);
28404 break;
28405 case TLS_LDM32:
28406 fputs ("(tlsldm)", fp);
28407 break;
28408 case TLS_LDO32:
28409 fputs ("(tlsldo)", fp);
28410 break;
28411 case TLS_IE32:
28412 fputs ("(gottpoff)", fp);
28413 break;
28414 case TLS_LE32:
28415 fputs ("(tpoff)", fp);
28416 break;
28417 case TLS_DESCSEQ:
28418 fputs ("(tlsdesc)", fp);
28419 break;
28420 default:
28421 gcc_unreachable ();
28424 switch (reloc)
28426 case TLS_GD32:
28427 case TLS_LDM32:
28428 case TLS_IE32:
28429 case TLS_DESCSEQ:
28430 fputs (" + (. - ", fp);
28431 output_addr_const (fp, XVECEXP (x, 0, 2));
28432 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28433 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28434 output_addr_const (fp, XVECEXP (x, 0, 3));
28435 fputc (')', fp);
28436 break;
28437 default:
28438 break;
28441 return TRUE;
28444 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28446 static void
28447 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28449 gcc_assert (size == 4);
28450 fputs ("\t.word\t", file);
28451 output_addr_const (file, x);
28452 fputs ("(tlsldo)", file);
28455 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28457 static bool
28458 arm_output_addr_const_extra (FILE *fp, rtx x)
28460 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28461 return arm_emit_tls_decoration (fp, x);
28462 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28464 char label[256];
28465 int labelno = INTVAL (XVECEXP (x, 0, 0));
28467 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28468 assemble_name_raw (fp, label);
28470 return TRUE;
28472 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28474 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28475 if (GOT_PCREL)
28476 fputs ("+.", fp);
28477 fputs ("-(", fp);
28478 output_addr_const (fp, XVECEXP (x, 0, 0));
28479 fputc (')', fp);
28480 return TRUE;
28482 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28484 output_addr_const (fp, XVECEXP (x, 0, 0));
28485 if (GOT_PCREL)
28486 fputs ("+.", fp);
28487 fputs ("-(", fp);
28488 output_addr_const (fp, XVECEXP (x, 0, 1));
28489 fputc (')', fp);
28490 return TRUE;
28492 else if (GET_CODE (x) == CONST_VECTOR)
28493 return arm_emit_vector_const (fp, x);
28495 return FALSE;
28498 /* Output assembly for a shift instruction.
28499 SET_FLAGS determines how the instruction modifies the condition codes.
28500 0 - Do not set condition codes.
28501 1 - Set condition codes.
28502 2 - Use smallest instruction. */
28503 const char *
28504 arm_output_shift(rtx * operands, int set_flags)
28506 char pattern[100];
28507 static const char flag_chars[3] = {'?', '.', '!'};
28508 const char *shift;
28509 HOST_WIDE_INT val;
28510 char c;
28512 c = flag_chars[set_flags];
28513 if (TARGET_UNIFIED_ASM)
28515 shift = shift_op(operands[3], &val);
28516 if (shift)
28518 if (val != -1)
28519 operands[2] = GEN_INT(val);
28520 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28522 else
28523 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28525 else
28526 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
28527 output_asm_insn (pattern, operands);
28528 return "";
28531 /* Output assembly for a WMMX immediate shift instruction. */
28532 const char *
28533 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28535 int shift = INTVAL (operands[2]);
28536 char templ[50];
28537 enum machine_mode opmode = GET_MODE (operands[0]);
28539 gcc_assert (shift >= 0);
28541 /* If the shift value in the register versions is > 63 (for D qualifier),
28542 31 (for W qualifier) or 15 (for H qualifier). */
28543 if (((opmode == V4HImode) && (shift > 15))
28544 || ((opmode == V2SImode) && (shift > 31))
28545 || ((opmode == DImode) && (shift > 63)))
28547 if (wror_or_wsra)
28549 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28550 output_asm_insn (templ, operands);
28551 if (opmode == DImode)
28553 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28554 output_asm_insn (templ, operands);
28557 else
28559 /* The destination register will contain all zeros. */
28560 sprintf (templ, "wzero\t%%0");
28561 output_asm_insn (templ, operands);
28563 return "";
28566 if ((opmode == DImode) && (shift > 32))
28568 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28569 output_asm_insn (templ, operands);
28570 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28571 output_asm_insn (templ, operands);
28573 else
28575 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28576 output_asm_insn (templ, operands);
28578 return "";
28581 /* Output assembly for a WMMX tinsr instruction. */
28582 const char *
28583 arm_output_iwmmxt_tinsr (rtx *operands)
28585 int mask = INTVAL (operands[3]);
28586 int i;
28587 char templ[50];
28588 int units = mode_nunits[GET_MODE (operands[0])];
28589 gcc_assert ((mask & (mask - 1)) == 0);
28590 for (i = 0; i < units; ++i)
28592 if ((mask & 0x01) == 1)
28594 break;
28596 mask >>= 1;
28598 gcc_assert (i < units);
28600 switch (GET_MODE (operands[0]))
28602 case V8QImode:
28603 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28604 break;
28605 case V4HImode:
28606 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28607 break;
28608 case V2SImode:
28609 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28610 break;
28611 default:
28612 gcc_unreachable ();
28613 break;
28615 output_asm_insn (templ, operands);
28617 return "";
28620 /* Output a Thumb-1 casesi dispatch sequence. */
28621 const char *
28622 thumb1_output_casesi (rtx *operands)
28624 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
28626 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28628 switch (GET_MODE(diff_vec))
28630 case QImode:
28631 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28632 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28633 case HImode:
28634 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28635 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28636 case SImode:
28637 return "bl\t%___gnu_thumb1_case_si";
28638 default:
28639 gcc_unreachable ();
28643 /* Output a Thumb-2 casesi instruction. */
28644 const char *
28645 thumb2_output_casesi (rtx *operands)
28647 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
28649 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28651 output_asm_insn ("cmp\t%0, %1", operands);
28652 output_asm_insn ("bhi\t%l3", operands);
28653 switch (GET_MODE(diff_vec))
28655 case QImode:
28656 return "tbb\t[%|pc, %0]";
28657 case HImode:
28658 return "tbh\t[%|pc, %0, lsl #1]";
28659 case SImode:
28660 if (flag_pic)
28662 output_asm_insn ("adr\t%4, %l2", operands);
28663 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28664 output_asm_insn ("add\t%4, %4, %5", operands);
28665 return "bx\t%4";
28667 else
28669 output_asm_insn ("adr\t%4, %l2", operands);
28670 return "ldr\t%|pc, [%4, %0, lsl #2]";
28672 default:
28673 gcc_unreachable ();
28677 /* Most ARM cores are single issue, but some newer ones can dual issue.
28678 The scheduler descriptions rely on this being correct. */
28679 static int
28680 arm_issue_rate (void)
28682 switch (arm_tune)
28684 case cortexa15:
28685 return 3;
28687 case cortexr4:
28688 case cortexr4f:
28689 case cortexr5:
28690 case genericv7a:
28691 case cortexa5:
28692 case cortexa7:
28693 case cortexa8:
28694 case cortexa9:
28695 case cortexa53:
28696 case fa726te:
28697 case marvell_pj4:
28698 return 2;
28700 default:
28701 return 1;
28705 /* A table and a function to perform ARM-specific name mangling for
28706 NEON vector types in order to conform to the AAPCS (see "Procedure
28707 Call Standard for the ARM Architecture", Appendix A). To qualify
28708 for emission with the mangled names defined in that document, a
28709 vector type must not only be of the correct mode but also be
28710 composed of NEON vector element types (e.g. __builtin_neon_qi). */
28711 typedef struct
28713 enum machine_mode mode;
28714 const char *element_type_name;
28715 const char *aapcs_name;
28716 } arm_mangle_map_entry;
28718 static arm_mangle_map_entry arm_mangle_map[] = {
28719 /* 64-bit containerized types. */
28720 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
28721 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
28722 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
28723 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
28724 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
28725 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
28726 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
28727 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
28728 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
28729 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
28730 /* 128-bit containerized types. */
28731 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
28732 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
28733 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
28734 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
28735 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
28736 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
28737 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
28738 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
28739 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
28740 { VOIDmode, NULL, NULL }
28743 const char *
28744 arm_mangle_type (const_tree type)
28746 arm_mangle_map_entry *pos = arm_mangle_map;
28748 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28749 has to be managled as if it is in the "std" namespace. */
28750 if (TARGET_AAPCS_BASED
28751 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28752 return "St9__va_list";
28754 /* Half-precision float. */
28755 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28756 return "Dh";
28758 if (TREE_CODE (type) != VECTOR_TYPE)
28759 return NULL;
28761 /* Check the mode of the vector type, and the name of the vector
28762 element type, against the table. */
28763 while (pos->mode != VOIDmode)
28765 tree elt_type = TREE_TYPE (type);
28767 if (pos->mode == TYPE_MODE (type)
28768 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
28769 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
28770 pos->element_type_name))
28771 return pos->aapcs_name;
28773 pos++;
28776 /* Use the default mangling for unrecognized (possibly user-defined)
28777 vector types. */
28778 return NULL;
28781 /* Order of allocation of core registers for Thumb: this allocation is
28782 written over the corresponding initial entries of the array
28783 initialized with REG_ALLOC_ORDER. We allocate all low registers
28784 first. Saving and restoring a low register is usually cheaper than
28785 using a call-clobbered high register. */
28787 static const int thumb_core_reg_alloc_order[] =
28789 3, 2, 1, 0, 4, 5, 6, 7,
28790 14, 12, 8, 9, 10, 11
28793 /* Adjust register allocation order when compiling for Thumb. */
28795 void
28796 arm_order_regs_for_local_alloc (void)
28798 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28799 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28800 if (TARGET_THUMB)
28801 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28802 sizeof (thumb_core_reg_alloc_order));
28805 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28807 bool
28808 arm_frame_pointer_required (void)
28810 return (cfun->has_nonlocal_label
28811 || SUBTARGET_FRAME_POINTER_REQUIRED
28812 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
28815 /* Only thumb1 can't support conditional execution, so return true if
28816 the target is not thumb1. */
28817 static bool
28818 arm_have_conditional_execution (void)
28820 return !TARGET_THUMB1;
28823 tree
28824 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
28826 enum machine_mode in_mode, out_mode;
28827 int in_n, out_n;
28829 if (TREE_CODE (type_out) != VECTOR_TYPE
28830 || TREE_CODE (type_in) != VECTOR_TYPE
28831 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
28832 return NULL_TREE;
28834 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28835 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28836 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28837 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28839 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
28840 decl of the vectorized builtin for the appropriate vector mode.
28841 NULL_TREE is returned if no such builtin is available. */
28842 #undef ARM_CHECK_BUILTIN_MODE
28843 #define ARM_CHECK_BUILTIN_MODE(C) \
28844 (out_mode == SFmode && out_n == C \
28845 && in_mode == SFmode && in_n == C)
28847 #undef ARM_FIND_VRINT_VARIANT
28848 #define ARM_FIND_VRINT_VARIANT(N) \
28849 (ARM_CHECK_BUILTIN_MODE (2) \
28850 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
28851 : (ARM_CHECK_BUILTIN_MODE (4) \
28852 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
28853 : NULL_TREE))
28855 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
28857 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28858 switch (fn)
28860 case BUILT_IN_FLOORF:
28861 return ARM_FIND_VRINT_VARIANT (vrintm);
28862 case BUILT_IN_CEILF:
28863 return ARM_FIND_VRINT_VARIANT (vrintp);
28864 case BUILT_IN_TRUNCF:
28865 return ARM_FIND_VRINT_VARIANT (vrintz);
28866 case BUILT_IN_ROUNDF:
28867 return ARM_FIND_VRINT_VARIANT (vrinta);
28868 default:
28869 return NULL_TREE;
28872 return NULL_TREE;
28874 #undef ARM_CHECK_BUILTIN_MODE
28875 #undef ARM_FIND_VRINT_VARIANT
28877 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28878 static HOST_WIDE_INT
28879 arm_vector_alignment (const_tree type)
28881 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28883 if (TARGET_AAPCS_BASED)
28884 align = MIN (align, 64);
28886 return align;
28889 static unsigned int
28890 arm_autovectorize_vector_sizes (void)
28892 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28895 static bool
28896 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28898 /* Vectors which aren't in packed structures will not be less aligned than
28899 the natural alignment of their element type, so this is safe. */
28900 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
28901 return !is_packed;
28903 return default_builtin_vector_alignment_reachable (type, is_packed);
28906 static bool
28907 arm_builtin_support_vector_misalignment (enum machine_mode mode,
28908 const_tree type, int misalignment,
28909 bool is_packed)
28911 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
28913 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28915 if (is_packed)
28916 return align == 1;
28918 /* If the misalignment is unknown, we should be able to handle the access
28919 so long as it is not to a member of a packed data structure. */
28920 if (misalignment == -1)
28921 return true;
28923 /* Return true if the misalignment is a multiple of the natural alignment
28924 of the vector's element type. This is probably always going to be
28925 true in practice, since we've already established that this isn't a
28926 packed access. */
28927 return ((misalignment % align) == 0);
28930 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28931 is_packed);
28934 static void
28935 arm_conditional_register_usage (void)
28937 int regno;
28939 if (TARGET_THUMB1 && optimize_size)
28941 /* When optimizing for size on Thumb-1, it's better not
28942 to use the HI regs, because of the overhead of
28943 stacking them. */
28944 for (regno = FIRST_HI_REGNUM;
28945 regno <= LAST_HI_REGNUM; ++regno)
28946 fixed_regs[regno] = call_used_regs[regno] = 1;
28949 /* The link register can be clobbered by any branch insn,
28950 but we have no way to track that at present, so mark
28951 it as unavailable. */
28952 if (TARGET_THUMB1)
28953 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28955 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
28957 /* VFPv3 registers are disabled when earlier VFP
28958 versions are selected due to the definition of
28959 LAST_VFP_REGNUM. */
28960 for (regno = FIRST_VFP_REGNUM;
28961 regno <= LAST_VFP_REGNUM; ++ regno)
28963 fixed_regs[regno] = 0;
28964 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28965 || regno >= FIRST_VFP_REGNUM + 32;
28969 if (TARGET_REALLY_IWMMXT)
28971 regno = FIRST_IWMMXT_GR_REGNUM;
28972 /* The 2002/10/09 revision of the XScale ABI has wCG0
28973 and wCG1 as call-preserved registers. The 2002/11/21
28974 revision changed this so that all wCG registers are
28975 scratch registers. */
28976 for (regno = FIRST_IWMMXT_GR_REGNUM;
28977 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28978 fixed_regs[regno] = 0;
28979 /* The XScale ABI has wR0 - wR9 as scratch registers,
28980 the rest as call-preserved registers. */
28981 for (regno = FIRST_IWMMXT_REGNUM;
28982 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28984 fixed_regs[regno] = 0;
28985 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28989 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28991 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28992 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28994 else if (TARGET_APCS_STACK)
28996 fixed_regs[10] = 1;
28997 call_used_regs[10] = 1;
28999 /* -mcaller-super-interworking reserves r11 for calls to
29000 _interwork_r11_call_via_rN(). Making the register global
29001 is an easy way of ensuring that it remains valid for all
29002 calls. */
29003 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29004 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29006 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29007 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29008 if (TARGET_CALLER_INTERWORKING)
29009 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29011 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29014 static reg_class_t
29015 arm_preferred_rename_class (reg_class_t rclass)
29017 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29018 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29019 and code size can be reduced. */
29020 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29021 return LO_REGS;
29022 else
29023 return NO_REGS;
29026 /* Compute the atrribute "length" of insn "*push_multi".
29027 So this function MUST be kept in sync with that insn pattern. */
29029 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29031 int i, regno, hi_reg;
29032 int num_saves = XVECLEN (parallel_op, 0);
29034 /* ARM mode. */
29035 if (TARGET_ARM)
29036 return 4;
29037 /* Thumb1 mode. */
29038 if (TARGET_THUMB1)
29039 return 2;
29041 /* Thumb2 mode. */
29042 regno = REGNO (first_op);
29043 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29044 for (i = 1; i < num_saves && !hi_reg; i++)
29046 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29047 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29050 if (!hi_reg)
29051 return 2;
29052 return 4;
29055 /* Compute the number of instructions emitted by output_move_double. */
29057 arm_count_output_move_double_insns (rtx *operands)
29059 int count;
29060 rtx ops[2];
29061 /* output_move_double may modify the operands array, so call it
29062 here on a copy of the array. */
29063 ops[0] = operands[0];
29064 ops[1] = operands[1];
29065 output_move_double (ops, false, &count);
29066 return count;
29070 vfp3_const_double_for_fract_bits (rtx operand)
29072 REAL_VALUE_TYPE r0;
29074 if (!CONST_DOUBLE_P (operand))
29075 return 0;
29077 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29078 if (exact_real_inverse (DFmode, &r0))
29080 if (exact_real_truncate (DFmode, &r0))
29082 HOST_WIDE_INT value = real_to_integer (&r0);
29083 value = value & 0xffffffff;
29084 if ((value != 0) && ( (value & (value - 1)) == 0))
29085 return int_log2 (value);
29088 return 0;
29091 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29093 static void
29094 arm_pre_atomic_barrier (enum memmodel model)
29096 if (need_atomic_barrier_p (model, true))
29097 emit_insn (gen_memory_barrier ());
29100 static void
29101 arm_post_atomic_barrier (enum memmodel model)
29103 if (need_atomic_barrier_p (model, false))
29104 emit_insn (gen_memory_barrier ());
29107 /* Emit the load-exclusive and store-exclusive instructions.
29108 Use acquire and release versions if necessary. */
29110 static void
29111 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29113 rtx (*gen) (rtx, rtx);
29115 if (acq)
29117 switch (mode)
29119 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29120 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29121 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29122 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29123 default:
29124 gcc_unreachable ();
29127 else
29129 switch (mode)
29131 case QImode: gen = gen_arm_load_exclusiveqi; break;
29132 case HImode: gen = gen_arm_load_exclusivehi; break;
29133 case SImode: gen = gen_arm_load_exclusivesi; break;
29134 case DImode: gen = gen_arm_load_exclusivedi; break;
29135 default:
29136 gcc_unreachable ();
29140 emit_insn (gen (rval, mem));
29143 static void
29144 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29145 rtx mem, bool rel)
29147 rtx (*gen) (rtx, rtx, rtx);
29149 if (rel)
29151 switch (mode)
29153 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29154 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29155 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29156 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29157 default:
29158 gcc_unreachable ();
29161 else
29163 switch (mode)
29165 case QImode: gen = gen_arm_store_exclusiveqi; break;
29166 case HImode: gen = gen_arm_store_exclusivehi; break;
29167 case SImode: gen = gen_arm_store_exclusivesi; break;
29168 case DImode: gen = gen_arm_store_exclusivedi; break;
29169 default:
29170 gcc_unreachable ();
29174 emit_insn (gen (bval, rval, mem));
29177 /* Mark the previous jump instruction as unlikely. */
29179 static void
29180 emit_unlikely_jump (rtx insn)
29182 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29184 insn = emit_jump_insn (insn);
29185 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29188 /* Expand a compare and swap pattern. */
29190 void
29191 arm_expand_compare_and_swap (rtx operands[])
29193 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29194 enum machine_mode mode;
29195 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29197 bval = operands[0];
29198 rval = operands[1];
29199 mem = operands[2];
29200 oldval = operands[3];
29201 newval = operands[4];
29202 is_weak = operands[5];
29203 mod_s = operands[6];
29204 mod_f = operands[7];
29205 mode = GET_MODE (mem);
29207 /* Normally the succ memory model must be stronger than fail, but in the
29208 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29209 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29211 if (TARGET_HAVE_LDACQ
29212 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29213 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29214 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29216 switch (mode)
29218 case QImode:
29219 case HImode:
29220 /* For narrow modes, we're going to perform the comparison in SImode,
29221 so do the zero-extension now. */
29222 rval = gen_reg_rtx (SImode);
29223 oldval = convert_modes (SImode, mode, oldval, true);
29224 /* FALLTHRU */
29226 case SImode:
29227 /* Force the value into a register if needed. We waited until after
29228 the zero-extension above to do this properly. */
29229 if (!arm_add_operand (oldval, SImode))
29230 oldval = force_reg (SImode, oldval);
29231 break;
29233 case DImode:
29234 if (!cmpdi_operand (oldval, mode))
29235 oldval = force_reg (mode, oldval);
29236 break;
29238 default:
29239 gcc_unreachable ();
29242 switch (mode)
29244 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29245 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29246 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29247 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29248 default:
29249 gcc_unreachable ();
29252 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29254 if (mode == QImode || mode == HImode)
29255 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29257 /* In all cases, we arrange for success to be signaled by Z set.
29258 This arrangement allows for the boolean result to be used directly
29259 in a subsequent branch, post optimization. */
29260 x = gen_rtx_REG (CCmode, CC_REGNUM);
29261 x = gen_rtx_EQ (SImode, x, const0_rtx);
29262 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29265 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29266 another memory store between the load-exclusive and store-exclusive can
29267 reset the monitor from Exclusive to Open state. This means we must wait
29268 until after reload to split the pattern, lest we get a register spill in
29269 the middle of the atomic sequence. */
29271 void
29272 arm_split_compare_and_swap (rtx operands[])
29274 rtx rval, mem, oldval, newval, scratch;
29275 enum machine_mode mode;
29276 enum memmodel mod_s, mod_f;
29277 bool is_weak;
29278 rtx label1, label2, x, cond;
29280 rval = operands[0];
29281 mem = operands[1];
29282 oldval = operands[2];
29283 newval = operands[3];
29284 is_weak = (operands[4] != const0_rtx);
29285 mod_s = (enum memmodel) INTVAL (operands[5]);
29286 mod_f = (enum memmodel) INTVAL (operands[6]);
29287 scratch = operands[7];
29288 mode = GET_MODE (mem);
29290 bool use_acquire = TARGET_HAVE_LDACQ
29291 && !(mod_s == MEMMODEL_RELAXED
29292 || mod_s == MEMMODEL_CONSUME
29293 || mod_s == MEMMODEL_RELEASE);
29295 bool use_release = TARGET_HAVE_LDACQ
29296 && !(mod_s == MEMMODEL_RELAXED
29297 || mod_s == MEMMODEL_CONSUME
29298 || mod_s == MEMMODEL_ACQUIRE);
29300 /* Checks whether a barrier is needed and emits one accordingly. */
29301 if (!(use_acquire || use_release))
29302 arm_pre_atomic_barrier (mod_s);
29304 label1 = NULL_RTX;
29305 if (!is_weak)
29307 label1 = gen_label_rtx ();
29308 emit_label (label1);
29310 label2 = gen_label_rtx ();
29312 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29314 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29315 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29316 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29317 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29318 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29320 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29322 /* Weak or strong, we want EQ to be true for success, so that we
29323 match the flags that we got from the compare above. */
29324 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29325 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29326 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29328 if (!is_weak)
29330 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29331 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29332 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29333 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29336 if (mod_f != MEMMODEL_RELAXED)
29337 emit_label (label2);
29339 /* Checks whether a barrier is needed and emits one accordingly. */
29340 if (!(use_acquire || use_release))
29341 arm_post_atomic_barrier (mod_s);
29343 if (mod_f == MEMMODEL_RELAXED)
29344 emit_label (label2);
29347 void
29348 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29349 rtx value, rtx model_rtx, rtx cond)
29351 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
29352 enum machine_mode mode = GET_MODE (mem);
29353 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
29354 rtx label, x;
29356 bool use_acquire = TARGET_HAVE_LDACQ
29357 && !(model == MEMMODEL_RELAXED
29358 || model == MEMMODEL_CONSUME
29359 || model == MEMMODEL_RELEASE);
29361 bool use_release = TARGET_HAVE_LDACQ
29362 && !(model == MEMMODEL_RELAXED
29363 || model == MEMMODEL_CONSUME
29364 || model == MEMMODEL_ACQUIRE);
29366 /* Checks whether a barrier is needed and emits one accordingly. */
29367 if (!(use_acquire || use_release))
29368 arm_pre_atomic_barrier (model);
29370 label = gen_label_rtx ();
29371 emit_label (label);
29373 if (new_out)
29374 new_out = gen_lowpart (wmode, new_out);
29375 if (old_out)
29376 old_out = gen_lowpart (wmode, old_out);
29377 else
29378 old_out = new_out;
29379 value = simplify_gen_subreg (wmode, value, mode, 0);
29381 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29383 switch (code)
29385 case SET:
29386 new_out = value;
29387 break;
29389 case NOT:
29390 x = gen_rtx_AND (wmode, old_out, value);
29391 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29392 x = gen_rtx_NOT (wmode, new_out);
29393 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29394 break;
29396 case MINUS:
29397 if (CONST_INT_P (value))
29399 value = GEN_INT (-INTVAL (value));
29400 code = PLUS;
29402 /* FALLTHRU */
29404 case PLUS:
29405 if (mode == DImode)
29407 /* DImode plus/minus need to clobber flags. */
29408 /* The adddi3 and subdi3 patterns are incorrectly written so that
29409 they require matching operands, even when we could easily support
29410 three operands. Thankfully, this can be fixed up post-splitting,
29411 as the individual add+adc patterns do accept three operands and
29412 post-reload cprop can make these moves go away. */
29413 emit_move_insn (new_out, old_out);
29414 if (code == PLUS)
29415 x = gen_adddi3 (new_out, new_out, value);
29416 else
29417 x = gen_subdi3 (new_out, new_out, value);
29418 emit_insn (x);
29419 break;
29421 /* FALLTHRU */
29423 default:
29424 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29425 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29426 break;
29429 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29430 use_release);
29432 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29433 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29435 /* Checks whether a barrier is needed and emits one accordingly. */
29436 if (!(use_acquire || use_release))
29437 arm_post_atomic_barrier (model);
29440 #define MAX_VECT_LEN 16
29442 struct expand_vec_perm_d
29444 rtx target, op0, op1;
29445 unsigned char perm[MAX_VECT_LEN];
29446 enum machine_mode vmode;
29447 unsigned char nelt;
29448 bool one_vector_p;
29449 bool testing_p;
29452 /* Generate a variable permutation. */
29454 static void
29455 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29457 enum machine_mode vmode = GET_MODE (target);
29458 bool one_vector_p = rtx_equal_p (op0, op1);
29460 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29461 gcc_checking_assert (GET_MODE (op0) == vmode);
29462 gcc_checking_assert (GET_MODE (op1) == vmode);
29463 gcc_checking_assert (GET_MODE (sel) == vmode);
29464 gcc_checking_assert (TARGET_NEON);
29466 if (one_vector_p)
29468 if (vmode == V8QImode)
29469 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29470 else
29471 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29473 else
29475 rtx pair;
29477 if (vmode == V8QImode)
29479 pair = gen_reg_rtx (V16QImode);
29480 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29481 pair = gen_lowpart (TImode, pair);
29482 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29484 else
29486 pair = gen_reg_rtx (OImode);
29487 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29488 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29493 void
29494 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29496 enum machine_mode vmode = GET_MODE (target);
29497 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
29498 bool one_vector_p = rtx_equal_p (op0, op1);
29499 rtx rmask[MAX_VECT_LEN], mask;
29501 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29502 numbering of elements for big-endian, we must reverse the order. */
29503 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29505 /* The VTBL instruction does not use a modulo index, so we must take care
29506 of that ourselves. */
29507 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29508 for (i = 0; i < nelt; ++i)
29509 rmask[i] = mask;
29510 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
29511 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29513 arm_expand_vec_perm_1 (target, op0, op1, sel);
29516 /* Generate or test for an insn that supports a constant permutation. */
29518 /* Recognize patterns for the VUZP insns. */
29520 static bool
29521 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29523 unsigned int i, odd, mask, nelt = d->nelt;
29524 rtx out0, out1, in0, in1, x;
29525 rtx (*gen)(rtx, rtx, rtx, rtx);
29527 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29528 return false;
29530 /* Note that these are little-endian tests. Adjust for big-endian later. */
29531 if (d->perm[0] == 0)
29532 odd = 0;
29533 else if (d->perm[0] == 1)
29534 odd = 1;
29535 else
29536 return false;
29537 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29539 for (i = 0; i < nelt; i++)
29541 unsigned elt = (i * 2 + odd) & mask;
29542 if (d->perm[i] != elt)
29543 return false;
29546 /* Success! */
29547 if (d->testing_p)
29548 return true;
29550 switch (d->vmode)
29552 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29553 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29554 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29555 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29556 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29557 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29558 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29559 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29560 default:
29561 gcc_unreachable ();
29564 in0 = d->op0;
29565 in1 = d->op1;
29566 if (BYTES_BIG_ENDIAN)
29568 x = in0, in0 = in1, in1 = x;
29569 odd = !odd;
29572 out0 = d->target;
29573 out1 = gen_reg_rtx (d->vmode);
29574 if (odd)
29575 x = out0, out0 = out1, out1 = x;
29577 emit_insn (gen (out0, in0, in1, out1));
29578 return true;
29581 /* Recognize patterns for the VZIP insns. */
29583 static bool
29584 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29586 unsigned int i, high, mask, nelt = d->nelt;
29587 rtx out0, out1, in0, in1, x;
29588 rtx (*gen)(rtx, rtx, rtx, rtx);
29590 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29591 return false;
29593 /* Note that these are little-endian tests. Adjust for big-endian later. */
29594 high = nelt / 2;
29595 if (d->perm[0] == high)
29597 else if (d->perm[0] == 0)
29598 high = 0;
29599 else
29600 return false;
29601 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29603 for (i = 0; i < nelt / 2; i++)
29605 unsigned elt = (i + high) & mask;
29606 if (d->perm[i * 2] != elt)
29607 return false;
29608 elt = (elt + nelt) & mask;
29609 if (d->perm[i * 2 + 1] != elt)
29610 return false;
29613 /* Success! */
29614 if (d->testing_p)
29615 return true;
29617 switch (d->vmode)
29619 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29620 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29621 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29622 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29623 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
29624 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
29625 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29626 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29627 default:
29628 gcc_unreachable ();
29631 in0 = d->op0;
29632 in1 = d->op1;
29633 if (BYTES_BIG_ENDIAN)
29635 x = in0, in0 = in1, in1 = x;
29636 high = !high;
29639 out0 = d->target;
29640 out1 = gen_reg_rtx (d->vmode);
29641 if (high)
29642 x = out0, out0 = out1, out1 = x;
29644 emit_insn (gen (out0, in0, in1, out1));
29645 return true;
29648 /* Recognize patterns for the VREV insns. */
29650 static bool
29651 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29653 unsigned int i, j, diff, nelt = d->nelt;
29654 rtx (*gen)(rtx, rtx, rtx);
29656 if (!d->one_vector_p)
29657 return false;
29659 diff = d->perm[0];
29660 switch (diff)
29662 case 7:
29663 switch (d->vmode)
29665 case V16QImode: gen = gen_neon_vrev64v16qi; break;
29666 case V8QImode: gen = gen_neon_vrev64v8qi; break;
29667 default:
29668 return false;
29670 break;
29671 case 3:
29672 switch (d->vmode)
29674 case V16QImode: gen = gen_neon_vrev32v16qi; break;
29675 case V8QImode: gen = gen_neon_vrev32v8qi; break;
29676 case V8HImode: gen = gen_neon_vrev64v8hi; break;
29677 case V4HImode: gen = gen_neon_vrev64v4hi; break;
29678 default:
29679 return false;
29681 break;
29682 case 1:
29683 switch (d->vmode)
29685 case V16QImode: gen = gen_neon_vrev16v16qi; break;
29686 case V8QImode: gen = gen_neon_vrev16v8qi; break;
29687 case V8HImode: gen = gen_neon_vrev32v8hi; break;
29688 case V4HImode: gen = gen_neon_vrev32v4hi; break;
29689 case V4SImode: gen = gen_neon_vrev64v4si; break;
29690 case V2SImode: gen = gen_neon_vrev64v2si; break;
29691 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
29692 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
29693 default:
29694 return false;
29696 break;
29697 default:
29698 return false;
29701 for (i = 0; i < nelt ; i += diff + 1)
29702 for (j = 0; j <= diff; j += 1)
29704 /* This is guaranteed to be true as the value of diff
29705 is 7, 3, 1 and we should have enough elements in the
29706 queue to generate this. Getting a vector mask with a
29707 value of diff other than these values implies that
29708 something is wrong by the time we get here. */
29709 gcc_assert (i + j < nelt);
29710 if (d->perm[i + j] != i + diff - j)
29711 return false;
29714 /* Success! */
29715 if (d->testing_p)
29716 return true;
29718 /* ??? The third operand is an artifact of the builtin infrastructure
29719 and is ignored by the actual instruction. */
29720 emit_insn (gen (d->target, d->op0, const0_rtx));
29721 return true;
29724 /* Recognize patterns for the VTRN insns. */
29726 static bool
29727 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29729 unsigned int i, odd, mask, nelt = d->nelt;
29730 rtx out0, out1, in0, in1, x;
29731 rtx (*gen)(rtx, rtx, rtx, rtx);
29733 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29734 return false;
29736 /* Note that these are little-endian tests. Adjust for big-endian later. */
29737 if (d->perm[0] == 0)
29738 odd = 0;
29739 else if (d->perm[0] == 1)
29740 odd = 1;
29741 else
29742 return false;
29743 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29745 for (i = 0; i < nelt; i += 2)
29747 if (d->perm[i] != i + odd)
29748 return false;
29749 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29750 return false;
29753 /* Success! */
29754 if (d->testing_p)
29755 return true;
29757 switch (d->vmode)
29759 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29760 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29761 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29762 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29763 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29764 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29765 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29766 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29767 default:
29768 gcc_unreachable ();
29771 in0 = d->op0;
29772 in1 = d->op1;
29773 if (BYTES_BIG_ENDIAN)
29775 x = in0, in0 = in1, in1 = x;
29776 odd = !odd;
29779 out0 = d->target;
29780 out1 = gen_reg_rtx (d->vmode);
29781 if (odd)
29782 x = out0, out0 = out1, out1 = x;
29784 emit_insn (gen (out0, in0, in1, out1));
29785 return true;
29788 /* Recognize patterns for the VEXT insns. */
29790 static bool
29791 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29793 unsigned int i, nelt = d->nelt;
29794 rtx (*gen) (rtx, rtx, rtx, rtx);
29795 rtx offset;
29797 unsigned int location;
29799 unsigned int next = d->perm[0] + 1;
29801 /* TODO: Handle GCC's numbering of elements for big-endian. */
29802 if (BYTES_BIG_ENDIAN)
29803 return false;
29805 /* Check if the extracted indexes are increasing by one. */
29806 for (i = 1; i < nelt; next++, i++)
29808 /* If we hit the most significant element of the 2nd vector in
29809 the previous iteration, no need to test further. */
29810 if (next == 2 * nelt)
29811 return false;
29813 /* If we are operating on only one vector: it could be a
29814 rotation. If there are only two elements of size < 64, let
29815 arm_evpc_neon_vrev catch it. */
29816 if (d->one_vector_p && (next == nelt))
29818 if ((nelt == 2) && (d->vmode != V2DImode))
29819 return false;
29820 else
29821 next = 0;
29824 if (d->perm[i] != next)
29825 return false;
29828 location = d->perm[0];
29830 switch (d->vmode)
29832 case V16QImode: gen = gen_neon_vextv16qi; break;
29833 case V8QImode: gen = gen_neon_vextv8qi; break;
29834 case V4HImode: gen = gen_neon_vextv4hi; break;
29835 case V8HImode: gen = gen_neon_vextv8hi; break;
29836 case V2SImode: gen = gen_neon_vextv2si; break;
29837 case V4SImode: gen = gen_neon_vextv4si; break;
29838 case V2SFmode: gen = gen_neon_vextv2sf; break;
29839 case V4SFmode: gen = gen_neon_vextv4sf; break;
29840 case V2DImode: gen = gen_neon_vextv2di; break;
29841 default:
29842 return false;
29845 /* Success! */
29846 if (d->testing_p)
29847 return true;
29849 offset = GEN_INT (location);
29850 emit_insn (gen (d->target, d->op0, d->op1, offset));
29851 return true;
29854 /* The NEON VTBL instruction is a fully variable permuation that's even
29855 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29856 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29857 can do slightly better by expanding this as a constant where we don't
29858 have to apply a mask. */
29860 static bool
29861 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29863 rtx rperm[MAX_VECT_LEN], sel;
29864 enum machine_mode vmode = d->vmode;
29865 unsigned int i, nelt = d->nelt;
29867 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29868 numbering of elements for big-endian, we must reverse the order. */
29869 if (BYTES_BIG_ENDIAN)
29870 return false;
29872 if (d->testing_p)
29873 return true;
29875 /* Generic code will try constant permutation twice. Once with the
29876 original mode and again with the elements lowered to QImode.
29877 So wait and don't do the selector expansion ourselves. */
29878 if (vmode != V8QImode && vmode != V16QImode)
29879 return false;
29881 for (i = 0; i < nelt; ++i)
29882 rperm[i] = GEN_INT (d->perm[i]);
29883 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29884 sel = force_reg (vmode, sel);
29886 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29887 return true;
29890 static bool
29891 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29893 /* Check if the input mask matches vext before reordering the
29894 operands. */
29895 if (TARGET_NEON)
29896 if (arm_evpc_neon_vext (d))
29897 return true;
29899 /* The pattern matching functions above are written to look for a small
29900 number to begin the sequence (0, 1, N/2). If we begin with an index
29901 from the second operand, we can swap the operands. */
29902 if (d->perm[0] >= d->nelt)
29904 unsigned i, nelt = d->nelt;
29905 rtx x;
29907 for (i = 0; i < nelt; ++i)
29908 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29910 x = d->op0;
29911 d->op0 = d->op1;
29912 d->op1 = x;
29915 if (TARGET_NEON)
29917 if (arm_evpc_neon_vuzp (d))
29918 return true;
29919 if (arm_evpc_neon_vzip (d))
29920 return true;
29921 if (arm_evpc_neon_vrev (d))
29922 return true;
29923 if (arm_evpc_neon_vtrn (d))
29924 return true;
29925 return arm_evpc_neon_vtbl (d);
29927 return false;
29930 /* Expand a vec_perm_const pattern. */
29932 bool
29933 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29935 struct expand_vec_perm_d d;
29936 int i, nelt, which;
29938 d.target = target;
29939 d.op0 = op0;
29940 d.op1 = op1;
29942 d.vmode = GET_MODE (target);
29943 gcc_assert (VECTOR_MODE_P (d.vmode));
29944 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29945 d.testing_p = false;
29947 for (i = which = 0; i < nelt; ++i)
29949 rtx e = XVECEXP (sel, 0, i);
29950 int ei = INTVAL (e) & (2 * nelt - 1);
29951 which |= (ei < nelt ? 1 : 2);
29952 d.perm[i] = ei;
29955 switch (which)
29957 default:
29958 gcc_unreachable();
29960 case 3:
29961 d.one_vector_p = false;
29962 if (!rtx_equal_p (op0, op1))
29963 break;
29965 /* The elements of PERM do not suggest that only the first operand
29966 is used, but both operands are identical. Allow easier matching
29967 of the permutation by folding the permutation into the single
29968 input vector. */
29969 /* FALLTHRU */
29970 case 2:
29971 for (i = 0; i < nelt; ++i)
29972 d.perm[i] &= nelt - 1;
29973 d.op0 = op1;
29974 d.one_vector_p = true;
29975 break;
29977 case 1:
29978 d.op1 = op0;
29979 d.one_vector_p = true;
29980 break;
29983 return arm_expand_vec_perm_const_1 (&d);
29986 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29988 static bool
29989 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
29990 const unsigned char *sel)
29992 struct expand_vec_perm_d d;
29993 unsigned int i, nelt, which;
29994 bool ret;
29996 d.vmode = vmode;
29997 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29998 d.testing_p = true;
29999 memcpy (d.perm, sel, nelt);
30001 /* Categorize the set of elements in the selector. */
30002 for (i = which = 0; i < nelt; ++i)
30004 unsigned char e = d.perm[i];
30005 gcc_assert (e < 2 * nelt);
30006 which |= (e < nelt ? 1 : 2);
30009 /* For all elements from second vector, fold the elements to first. */
30010 if (which == 2)
30011 for (i = 0; i < nelt; ++i)
30012 d.perm[i] -= nelt;
30014 /* Check whether the mask can be applied to the vector type. */
30015 d.one_vector_p = (which != 3);
30017 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30018 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30019 if (!d.one_vector_p)
30020 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30022 start_sequence ();
30023 ret = arm_expand_vec_perm_const_1 (&d);
30024 end_sequence ();
30026 return ret;
30029 bool
30030 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30032 /* If we are soft float and we do not have ldrd
30033 then all auto increment forms are ok. */
30034 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30035 return true;
30037 switch (code)
30039 /* Post increment and Pre Decrement are supported for all
30040 instruction forms except for vector forms. */
30041 case ARM_POST_INC:
30042 case ARM_PRE_DEC:
30043 if (VECTOR_MODE_P (mode))
30045 if (code != ARM_PRE_DEC)
30046 return true;
30047 else
30048 return false;
30051 return true;
30053 case ARM_POST_DEC:
30054 case ARM_PRE_INC:
30055 /* Without LDRD and mode size greater than
30056 word size, there is no point in auto-incrementing
30057 because ldm and stm will not have these forms. */
30058 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30059 return false;
30061 /* Vector and floating point modes do not support
30062 these auto increment forms. */
30063 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30064 return false;
30066 return true;
30068 default:
30069 return false;
30073 return false;
30076 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30077 on ARM, since we know that shifts by negative amounts are no-ops.
30078 Additionally, the default expansion code is not available or suitable
30079 for post-reload insn splits (this can occur when the register allocator
30080 chooses not to do a shift in NEON).
30082 This function is used in both initial expand and post-reload splits, and
30083 handles all kinds of 64-bit shifts.
30085 Input requirements:
30086 - It is safe for the input and output to be the same register, but
30087 early-clobber rules apply for the shift amount and scratch registers.
30088 - Shift by register requires both scratch registers. In all other cases
30089 the scratch registers may be NULL.
30090 - Ashiftrt by a register also clobbers the CC register. */
30091 void
30092 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30093 rtx amount, rtx scratch1, rtx scratch2)
30095 rtx out_high = gen_highpart (SImode, out);
30096 rtx out_low = gen_lowpart (SImode, out);
30097 rtx in_high = gen_highpart (SImode, in);
30098 rtx in_low = gen_lowpart (SImode, in);
30100 /* Terminology:
30101 in = the register pair containing the input value.
30102 out = the destination register pair.
30103 up = the high- or low-part of each pair.
30104 down = the opposite part to "up".
30105 In a shift, we can consider bits to shift from "up"-stream to
30106 "down"-stream, so in a left-shift "up" is the low-part and "down"
30107 is the high-part of each register pair. */
30109 rtx out_up = code == ASHIFT ? out_low : out_high;
30110 rtx out_down = code == ASHIFT ? out_high : out_low;
30111 rtx in_up = code == ASHIFT ? in_low : in_high;
30112 rtx in_down = code == ASHIFT ? in_high : in_low;
30114 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30115 gcc_assert (out
30116 && (REG_P (out) || GET_CODE (out) == SUBREG)
30117 && GET_MODE (out) == DImode);
30118 gcc_assert (in
30119 && (REG_P (in) || GET_CODE (in) == SUBREG)
30120 && GET_MODE (in) == DImode);
30121 gcc_assert (amount
30122 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30123 && GET_MODE (amount) == SImode)
30124 || CONST_INT_P (amount)));
30125 gcc_assert (scratch1 == NULL
30126 || (GET_CODE (scratch1) == SCRATCH)
30127 || (GET_MODE (scratch1) == SImode
30128 && REG_P (scratch1)));
30129 gcc_assert (scratch2 == NULL
30130 || (GET_CODE (scratch2) == SCRATCH)
30131 || (GET_MODE (scratch2) == SImode
30132 && REG_P (scratch2)));
30133 gcc_assert (!REG_P (out) || !REG_P (amount)
30134 || !HARD_REGISTER_P (out)
30135 || (REGNO (out) != REGNO (amount)
30136 && REGNO (out) + 1 != REGNO (amount)));
30138 /* Macros to make following code more readable. */
30139 #define SUB_32(DEST,SRC) \
30140 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30141 #define RSB_32(DEST,SRC) \
30142 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30143 #define SUB_S_32(DEST,SRC) \
30144 gen_addsi3_compare0 ((DEST), (SRC), \
30145 GEN_INT (-32))
30146 #define SET(DEST,SRC) \
30147 gen_rtx_SET (SImode, (DEST), (SRC))
30148 #define SHIFT(CODE,SRC,AMOUNT) \
30149 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30150 #define LSHIFT(CODE,SRC,AMOUNT) \
30151 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30152 SImode, (SRC), (AMOUNT))
30153 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30154 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30155 SImode, (SRC), (AMOUNT))
30156 #define ORR(A,B) \
30157 gen_rtx_IOR (SImode, (A), (B))
30158 #define BRANCH(COND,LABEL) \
30159 gen_arm_cond_branch ((LABEL), \
30160 gen_rtx_ ## COND (CCmode, cc_reg, \
30161 const0_rtx), \
30162 cc_reg)
30164 /* Shifts by register and shifts by constant are handled separately. */
30165 if (CONST_INT_P (amount))
30167 /* We have a shift-by-constant. */
30169 /* First, handle out-of-range shift amounts.
30170 In both cases we try to match the result an ARM instruction in a
30171 shift-by-register would give. This helps reduce execution
30172 differences between optimization levels, but it won't stop other
30173 parts of the compiler doing different things. This is "undefined
30174 behaviour, in any case. */
30175 if (INTVAL (amount) <= 0)
30176 emit_insn (gen_movdi (out, in));
30177 else if (INTVAL (amount) >= 64)
30179 if (code == ASHIFTRT)
30181 rtx const31_rtx = GEN_INT (31);
30182 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30183 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30185 else
30186 emit_insn (gen_movdi (out, const0_rtx));
30189 /* Now handle valid shifts. */
30190 else if (INTVAL (amount) < 32)
30192 /* Shifts by a constant less than 32. */
30193 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30195 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30196 emit_insn (SET (out_down,
30197 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30198 out_down)));
30199 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30201 else
30203 /* Shifts by a constant greater than 31. */
30204 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30206 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30207 if (code == ASHIFTRT)
30208 emit_insn (gen_ashrsi3 (out_up, in_up,
30209 GEN_INT (31)));
30210 else
30211 emit_insn (SET (out_up, const0_rtx));
30214 else
30216 /* We have a shift-by-register. */
30217 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30219 /* This alternative requires the scratch registers. */
30220 gcc_assert (scratch1 && REG_P (scratch1));
30221 gcc_assert (scratch2 && REG_P (scratch2));
30223 /* We will need the values "amount-32" and "32-amount" later.
30224 Swapping them around now allows the later code to be more general. */
30225 switch (code)
30227 case ASHIFT:
30228 emit_insn (SUB_32 (scratch1, amount));
30229 emit_insn (RSB_32 (scratch2, amount));
30230 break;
30231 case ASHIFTRT:
30232 emit_insn (RSB_32 (scratch1, amount));
30233 /* Also set CC = amount > 32. */
30234 emit_insn (SUB_S_32 (scratch2, amount));
30235 break;
30236 case LSHIFTRT:
30237 emit_insn (RSB_32 (scratch1, amount));
30238 emit_insn (SUB_32 (scratch2, amount));
30239 break;
30240 default:
30241 gcc_unreachable ();
30244 /* Emit code like this:
30246 arithmetic-left:
30247 out_down = in_down << amount;
30248 out_down = (in_up << (amount - 32)) | out_down;
30249 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30250 out_up = in_up << amount;
30252 arithmetic-right:
30253 out_down = in_down >> amount;
30254 out_down = (in_up << (32 - amount)) | out_down;
30255 if (amount < 32)
30256 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30257 out_up = in_up << amount;
30259 logical-right:
30260 out_down = in_down >> amount;
30261 out_down = (in_up << (32 - amount)) | out_down;
30262 if (amount < 32)
30263 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30264 out_up = in_up << amount;
30266 The ARM and Thumb2 variants are the same but implemented slightly
30267 differently. If this were only called during expand we could just
30268 use the Thumb2 case and let combine do the right thing, but this
30269 can also be called from post-reload splitters. */
30271 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30273 if (!TARGET_THUMB2)
30275 /* Emit code for ARM mode. */
30276 emit_insn (SET (out_down,
30277 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30278 if (code == ASHIFTRT)
30280 rtx done_label = gen_label_rtx ();
30281 emit_jump_insn (BRANCH (LT, done_label));
30282 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30283 out_down)));
30284 emit_label (done_label);
30286 else
30287 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30288 out_down)));
30290 else
30292 /* Emit code for Thumb2 mode.
30293 Thumb2 can't do shift and or in one insn. */
30294 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30295 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30297 if (code == ASHIFTRT)
30299 rtx done_label = gen_label_rtx ();
30300 emit_jump_insn (BRANCH (LT, done_label));
30301 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30302 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30303 emit_label (done_label);
30305 else
30307 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30308 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30312 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30315 #undef SUB_32
30316 #undef RSB_32
30317 #undef SUB_S_32
30318 #undef SET
30319 #undef SHIFT
30320 #undef LSHIFT
30321 #undef REV_LSHIFT
30322 #undef ORR
30323 #undef BRANCH
30327 /* Returns true if a valid comparison operation and makes
30328 the operands in a form that is valid. */
30329 bool
30330 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30332 enum rtx_code code = GET_CODE (*comparison);
30333 int code_int;
30334 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30335 ? GET_MODE (*op2) : GET_MODE (*op1);
30337 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30339 if (code == UNEQ || code == LTGT)
30340 return false;
30342 code_int = (int)code;
30343 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30344 PUT_CODE (*comparison, (enum rtx_code)code_int);
30346 switch (mode)
30348 case SImode:
30349 if (!arm_add_operand (*op1, mode))
30350 *op1 = force_reg (mode, *op1);
30351 if (!arm_add_operand (*op2, mode))
30352 *op2 = force_reg (mode, *op2);
30353 return true;
30355 case DImode:
30356 if (!cmpdi_operand (*op1, mode))
30357 *op1 = force_reg (mode, *op1);
30358 if (!cmpdi_operand (*op2, mode))
30359 *op2 = force_reg (mode, *op2);
30360 return true;
30362 case SFmode:
30363 case DFmode:
30364 if (!arm_float_compare_operand (*op1, mode))
30365 *op1 = force_reg (mode, *op1);
30366 if (!arm_float_compare_operand (*op2, mode))
30367 *op2 = force_reg (mode, *op2);
30368 return true;
30369 default:
30370 break;
30373 return false;
30377 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30379 static unsigned HOST_WIDE_INT
30380 arm_asan_shadow_offset (void)
30382 return (unsigned HOST_WIDE_INT) 1 << 29;
30385 #include "gt-arm.h"