Merge branches/gcc-4_8-branch rev 216856
[official-gcc.git] / gcc-4_8-branch / gcc / config / arm / arm.c
blob80891343bddbed9a4fc97c84b4fdcd7e986367be
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "obstack.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "reload.h"
38 #include "function.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "diagnostic-core.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "ggc.h"
45 #include "except.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "debug.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "intl.h"
53 #include "libfuncs.h"
54 #include "params.h"
55 #include "opts.h"
56 #include "dumpfile.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 void (*arm_lang_output_object_attributes_hook)(void);
64 struct four_ints
66 int i[4];
69 /* Forward function declarations. */
70 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
71 static int arm_compute_static_chain_stack_bytes (void);
72 static arm_stack_offsets *arm_get_frame_offsets (void);
73 static void arm_add_gc_roots (void);
74 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
75 HOST_WIDE_INT, rtx, rtx, int, int);
76 static unsigned bit_count (unsigned long);
77 static int arm_address_register_rtx_p (rtx, int);
78 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
79 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
80 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
81 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
82 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
83 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
84 inline static int thumb1_index_register_rtx_p (rtx, int);
85 static int thumb_far_jump_used_p (void);
86 static bool thumb_force_lr_save (void);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
133 static int optimal_immediate_sequence (enum rtx_code code,
134 unsigned HOST_WIDE_INT val,
135 struct four_ints *return_sequence);
136 static int optimal_immediate_sequence_1 (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence,
139 int i);
140 static int arm_get_strip_length (int);
141 static bool arm_function_ok_for_sibcall (tree, tree);
142 static enum machine_mode arm_promote_function_mode (const_tree,
143 enum machine_mode, int *,
144 const_tree, int);
145 static bool arm_return_in_memory (const_tree, const_tree);
146 static rtx arm_function_value (const_tree, const_tree, bool);
147 static rtx arm_libcall_value_1 (enum machine_mode);
148 static rtx arm_libcall_value (enum machine_mode, const_rtx);
149 static bool arm_function_value_regno_p (const unsigned int);
150 static void arm_internal_label (FILE *, const char *, unsigned long);
151 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
152 tree);
153 static bool arm_have_conditional_execution (void);
154 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
155 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
156 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
157 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
158 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
159 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
163 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
164 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
165 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
166 static void arm_init_builtins (void);
167 static void arm_init_iwmmxt_builtins (void);
168 static rtx safe_vector_operand (rtx, enum machine_mode);
169 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
170 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
171 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
172 static tree arm_builtin_decl (unsigned, bool);
173 static void emit_constant_insn (rtx cond, rtx pattern);
174 static rtx emit_set_insn (rtx, rtx);
175 static rtx emit_multi_reg_push (unsigned long);
176 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
177 tree, bool);
178 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
179 const_tree, bool);
180 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
181 const_tree, bool);
182 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
183 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
184 const_tree);
185 static rtx aapcs_libcall_value (enum machine_mode);
186 static int aapcs_select_return_coproc (const_tree, const_tree);
188 #ifdef OBJECT_FORMAT_ELF
189 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
190 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
191 #endif
192 #ifndef ARM_PE
193 static void arm_encode_section_info (tree, rtx, int);
194 #endif
196 static void arm_file_end (void);
197 static void arm_file_start (void);
199 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
200 tree, int *, int);
201 static bool arm_pass_by_reference (cumulative_args_t,
202 enum machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
209 #if ARM_UNWIND_INFO
210 static void arm_unwind_emit (FILE *, rtx);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 static void arm_asm_init_sections (void);
214 #endif
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
232 static bool arm_cannot_copy_insn_p (rtx);
233 static int arm_issue_rate (void);
234 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
235 static bool arm_output_addr_const_extra (FILE *, rtx);
236 static bool arm_allocate_stack_slots_for_args (void);
237 static bool arm_warn_func_return (tree);
238 static const char *arm_invalid_parameter_type (const_tree t);
239 static const char *arm_invalid_return_type (const_tree t);
240 static tree arm_promoted_type (const_tree t);
241 static tree arm_convert_to_type (tree type, tree expr);
242 static bool arm_scalar_mode_supported_p (enum machine_mode);
243 static bool arm_frame_pointer_required (void);
244 static bool arm_can_eliminate (const int, const int);
245 static void arm_asm_trampoline_template (FILE *);
246 static void arm_trampoline_init (rtx, tree, rtx);
247 static rtx arm_trampoline_adjust_address (rtx);
248 static rtx arm_pic_static_addr (rtx orig, rtx reg);
249 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
250 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool arm_array_mode_supported_p (enum machine_mode,
253 unsigned HOST_WIDE_INT);
254 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
255 static bool arm_class_likely_spilled_p (reg_class_t);
256 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
259 const_tree type,
260 int misalignment,
261 bool is_packed);
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
268 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
269 const unsigned char *sel);
271 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
272 tree vectype,
273 int misalign ATTRIBUTE_UNUSED);
274 static unsigned arm_add_stmt_cost (void *data, int count,
275 enum vect_cost_for_stmt kind,
276 struct _stmt_vec_info *stmt_info,
277 int misalign,
278 enum vect_cost_model_location where);
280 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
281 bool op0_preserve_value);
282 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
284 /* Table of machine attributes. */
285 static const struct attribute_spec arm_attribute_table[] =
287 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
288 affects_type_identity } */
289 /* Function calls made to this symbol must be done indirectly, because
290 it may lie outside of the 26 bit addressing range of a normal function
291 call. */
292 { "long_call", 0, 0, false, true, true, NULL, false },
293 /* Whereas these functions are always known to reside within the 26 bit
294 addressing range. */
295 { "short_call", 0, 0, false, true, true, NULL, false },
296 /* Specify the procedure call conventions for a function. */
297 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
298 false },
299 /* Interrupt Service Routines have special prologue and epilogue requirements. */
300 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
301 false },
302 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
303 false },
304 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
305 false },
306 #ifdef ARM_PE
307 /* ARM/PE has three new attributes:
308 interfacearm - ?
309 dllexport - for exporting a function/variable that will live in a dll
310 dllimport - for importing a function/variable from a dll
312 Microsoft allows multiple declspecs in one __declspec, separating
313 them with spaces. We do NOT support this. Instead, use __declspec
314 multiple times.
316 { "dllimport", 0, 0, true, false, false, NULL, false },
317 { "dllexport", 0, 0, true, false, false, NULL, false },
318 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
319 false },
320 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
322 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
323 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
324 false },
325 #endif
326 { NULL, 0, 0, false, false, false, NULL, false }
329 /* Initialize the GCC target structure. */
330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 #undef TARGET_MERGE_DECL_ATTRIBUTES
332 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
333 #endif
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
338 #undef TARGET_ATTRIBUTE_TABLE
339 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
341 #undef TARGET_ASM_FILE_START
342 #define TARGET_ASM_FILE_START arm_file_start
343 #undef TARGET_ASM_FILE_END
344 #define TARGET_ASM_FILE_END arm_file_end
346 #undef TARGET_ASM_ALIGNED_SI_OP
347 #define TARGET_ASM_ALIGNED_SI_OP NULL
348 #undef TARGET_ASM_INTEGER
349 #define TARGET_ASM_INTEGER arm_assemble_integer
351 #undef TARGET_PRINT_OPERAND
352 #define TARGET_PRINT_OPERAND arm_print_operand
353 #undef TARGET_PRINT_OPERAND_ADDRESS
354 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
355 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
356 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
358 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
359 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
361 #undef TARGET_ASM_FUNCTION_PROLOGUE
362 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
364 #undef TARGET_ASM_FUNCTION_EPILOGUE
365 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE arm_option_override
370 #undef TARGET_COMP_TYPE_ATTRIBUTES
371 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
373 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
374 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
379 #undef TARGET_SCHED_REORDER
380 #define TARGET_SCHED_REORDER arm_sched_reorder
382 #undef TARGET_REGISTER_MOVE_COST
383 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
385 #undef TARGET_MEMORY_MOVE_COST
386 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
388 #undef TARGET_ENCODE_SECTION_INFO
389 #ifdef ARM_PE
390 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
391 #else
392 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
393 #endif
395 #undef TARGET_STRIP_NAME_ENCODING
396 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
398 #undef TARGET_ASM_INTERNAL_LABEL
399 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
404 #undef TARGET_FUNCTION_VALUE
405 #define TARGET_FUNCTION_VALUE arm_function_value
407 #undef TARGET_LIBCALL_VALUE
408 #define TARGET_LIBCALL_VALUE arm_libcall_value
410 #undef TARGET_FUNCTION_VALUE_REGNO_P
411 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
413 #undef TARGET_ASM_OUTPUT_MI_THUNK
414 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
415 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
416 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
418 #undef TARGET_RTX_COSTS
419 #define TARGET_RTX_COSTS arm_rtx_costs
420 #undef TARGET_ADDRESS_COST
421 #define TARGET_ADDRESS_COST arm_address_cost
423 #undef TARGET_SHIFT_TRUNCATION_MASK
424 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
425 #undef TARGET_VECTOR_MODE_SUPPORTED_P
426 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
427 #undef TARGET_ARRAY_MODE_SUPPORTED_P
428 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
429 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
430 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
431 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
432 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
433 arm_autovectorize_vector_sizes
435 #undef TARGET_MACHINE_DEPENDENT_REORG
436 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
438 #undef TARGET_INIT_BUILTINS
439 #define TARGET_INIT_BUILTINS arm_init_builtins
440 #undef TARGET_EXPAND_BUILTIN
441 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
442 #undef TARGET_BUILTIN_DECL
443 #define TARGET_BUILTIN_DECL arm_builtin_decl
445 #undef TARGET_INIT_LIBFUNCS
446 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
448 #undef TARGET_PROMOTE_FUNCTION_MODE
449 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
450 #undef TARGET_PROMOTE_PROTOTYPES
451 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
452 #undef TARGET_PASS_BY_REFERENCE
453 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
454 #undef TARGET_ARG_PARTIAL_BYTES
455 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
456 #undef TARGET_FUNCTION_ARG
457 #define TARGET_FUNCTION_ARG arm_function_arg
458 #undef TARGET_FUNCTION_ARG_ADVANCE
459 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
460 #undef TARGET_FUNCTION_ARG_BOUNDARY
461 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
463 #undef TARGET_SETUP_INCOMING_VARARGS
464 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
466 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
467 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
469 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
470 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
471 #undef TARGET_TRAMPOLINE_INIT
472 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
473 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
474 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
476 #undef TARGET_WARN_FUNC_RETURN
477 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
479 #undef TARGET_DEFAULT_SHORT_ENUMS
480 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
482 #undef TARGET_ALIGN_ANON_BITFIELD
483 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
485 #undef TARGET_NARROW_VOLATILE_BITFIELD
486 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
488 #undef TARGET_CXX_GUARD_TYPE
489 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
491 #undef TARGET_CXX_GUARD_MASK_BIT
492 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
494 #undef TARGET_CXX_GET_COOKIE_SIZE
495 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
497 #undef TARGET_CXX_COOKIE_HAS_SIZE
498 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
500 #undef TARGET_CXX_CDTOR_RETURNS_THIS
501 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
503 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
504 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
506 #undef TARGET_CXX_USE_AEABI_ATEXIT
507 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
509 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
510 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
511 arm_cxx_determine_class_data_visibility
513 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
514 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
516 #undef TARGET_RETURN_IN_MSB
517 #define TARGET_RETURN_IN_MSB arm_return_in_msb
519 #undef TARGET_RETURN_IN_MEMORY
520 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
522 #undef TARGET_MUST_PASS_IN_STACK
523 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
525 #if ARM_UNWIND_INFO
526 #undef TARGET_ASM_UNWIND_EMIT
527 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
529 /* EABI unwinding tables use a different format for the typeinfo tables. */
530 #undef TARGET_ASM_TTYPE
531 #define TARGET_ASM_TTYPE arm_output_ttype
533 #undef TARGET_ARM_EABI_UNWINDER
534 #define TARGET_ARM_EABI_UNWINDER true
536 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
537 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
539 #undef TARGET_ASM_INIT_SECTIONS
540 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
541 #endif /* ARM_UNWIND_INFO */
543 #undef TARGET_DWARF_REGISTER_SPAN
544 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
546 #undef TARGET_CANNOT_COPY_INSN_P
547 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
549 #ifdef HAVE_AS_TLS
550 #undef TARGET_HAVE_TLS
551 #define TARGET_HAVE_TLS true
552 #endif
554 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
555 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
557 #undef TARGET_LEGITIMATE_CONSTANT_P
558 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
560 #undef TARGET_CANNOT_FORCE_CONST_MEM
561 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
563 #undef TARGET_MAX_ANCHOR_OFFSET
564 #define TARGET_MAX_ANCHOR_OFFSET 4095
566 /* The minimum is set such that the total size of the block
567 for a particular anchor is -4088 + 1 + 4095 bytes, which is
568 divisible by eight, ensuring natural spacing of anchors. */
569 #undef TARGET_MIN_ANCHOR_OFFSET
570 #define TARGET_MIN_ANCHOR_OFFSET -4088
572 #undef TARGET_SCHED_ISSUE_RATE
573 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
575 #undef TARGET_MANGLE_TYPE
576 #define TARGET_MANGLE_TYPE arm_mangle_type
578 #undef TARGET_BUILD_BUILTIN_VA_LIST
579 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
580 #undef TARGET_EXPAND_BUILTIN_VA_START
581 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
583 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
585 #ifdef HAVE_AS_TLS
586 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
587 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
588 #endif
590 #undef TARGET_LEGITIMATE_ADDRESS_P
591 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
593 #undef TARGET_PREFERRED_RELOAD_CLASS
594 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
596 #undef TARGET_INVALID_PARAMETER_TYPE
597 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
599 #undef TARGET_INVALID_RETURN_TYPE
600 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
602 #undef TARGET_PROMOTED_TYPE
603 #define TARGET_PROMOTED_TYPE arm_promoted_type
605 #undef TARGET_CONVERT_TO_TYPE
606 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
608 #undef TARGET_SCALAR_MODE_SUPPORTED_P
609 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
611 #undef TARGET_FRAME_POINTER_REQUIRED
612 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
614 #undef TARGET_CAN_ELIMINATE
615 #define TARGET_CAN_ELIMINATE arm_can_eliminate
617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
618 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
620 #undef TARGET_CLASS_LIKELY_SPILLED_P
621 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
623 #undef TARGET_VECTORIZE_BUILTINS
624 #define TARGET_VECTORIZE_BUILTINS
626 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
627 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
628 arm_builtin_vectorized_function
630 #undef TARGET_VECTOR_ALIGNMENT
631 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
633 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
634 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
635 arm_vector_alignment_reachable
637 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
638 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
639 arm_builtin_support_vector_misalignment
641 #undef TARGET_PREFERRED_RENAME_CLASS
642 #define TARGET_PREFERRED_RENAME_CLASS \
643 arm_preferred_rename_class
645 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
646 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
647 arm_vectorize_vec_perm_const_ok
649 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
650 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
651 arm_builtin_vectorization_cost
652 #undef TARGET_VECTORIZE_ADD_STMT_COST
653 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
655 #undef TARGET_CANONICALIZE_COMPARISON
656 #define TARGET_CANONICALIZE_COMPARISON \
657 arm_canonicalize_comparison
659 #undef TARGET_ASAN_SHADOW_OFFSET
660 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
662 #undef MAX_INSN_PER_IT_BLOCK
663 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
666 struct gcc_target targetm = TARGET_INITIALIZER;
668 /* Obstack for minipool constant handling. */
669 static struct obstack minipool_obstack;
670 static char * minipool_startobj;
672 /* The maximum number of insns skipped which
673 will be conditionalised if possible. */
674 static int max_insns_skipped = 5;
676 extern FILE * asm_out_file;
678 /* True if we are currently building a constant table. */
679 int making_const_table;
681 /* The processor for which instructions should be scheduled. */
682 enum processor_type arm_tune = arm_none;
684 /* The current tuning set. */
685 const struct tune_params *current_tune;
687 /* Which floating point hardware to schedule for. */
688 int arm_fpu_attr;
690 /* Which floating popint hardware to use. */
691 const struct arm_fpu_desc *arm_fpu_desc;
693 /* Used for Thumb call_via trampolines. */
694 rtx thumb_call_via_label[14];
695 static int thumb_call_reg_needed;
697 /* Bit values used to identify processor capabilities. */
698 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
699 #define FL_ARCH3M (1 << 1) /* Extended multiply */
700 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
701 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
702 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
703 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
704 #define FL_THUMB (1 << 6) /* Thumb aware */
705 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
706 #define FL_STRONG (1 << 8) /* StrongARM */
707 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
708 #define FL_XSCALE (1 << 10) /* XScale */
709 /* spare (1 << 11) */
710 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
711 media instructions. */
712 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
713 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
714 Note: ARM6 & 7 derivatives only. */
715 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
716 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
717 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
718 profile. */
719 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
720 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
721 #define FL_NEON (1 << 20) /* Neon instructions. */
722 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
723 architecture. */
724 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
725 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
726 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
727 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
729 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
730 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
732 /* Flags that only effect tuning, not available instructions. */
733 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
734 | FL_CO_PROC)
736 #define FL_FOR_ARCH2 FL_NOTM
737 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
738 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
739 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
740 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
741 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
742 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
743 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
744 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
745 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
746 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
747 #define FL_FOR_ARCH6J FL_FOR_ARCH6
748 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
749 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
750 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
751 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
752 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
753 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
754 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
755 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
756 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
757 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
758 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
759 | FL_ARM_DIV | FL_NOTM)
761 /* The bits in this mask specify which
762 instructions we are allowed to generate. */
763 static unsigned long insn_flags = 0;
765 /* The bits in this mask specify which instruction scheduling options should
766 be used. */
767 static unsigned long tune_flags = 0;
769 /* The highest ARM architecture version supported by the
770 target. */
771 enum base_architecture arm_base_arch = BASE_ARCH_0;
773 /* The following are used in the arm.md file as equivalents to bits
774 in the above two flag variables. */
776 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
777 int arm_arch3m = 0;
779 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
780 int arm_arch4 = 0;
782 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
783 int arm_arch4t = 0;
785 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
786 int arm_arch5 = 0;
788 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
789 int arm_arch5e = 0;
791 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
792 int arm_arch6 = 0;
794 /* Nonzero if this chip supports the ARM 6K extensions. */
795 int arm_arch6k = 0;
797 /* Nonzero if instructions present in ARMv6-M can be used. */
798 int arm_arch6m = 0;
800 /* Nonzero if this chip supports the ARM 7 extensions. */
801 int arm_arch7 = 0;
803 /* Nonzero if instructions not present in the 'M' profile can be used. */
804 int arm_arch_notm = 0;
806 /* Nonzero if instructions present in ARMv7E-M can be used. */
807 int arm_arch7em = 0;
809 /* Nonzero if instructions present in ARMv8 can be used. */
810 int arm_arch8 = 0;
812 /* Nonzero if this chip can benefit from load scheduling. */
813 int arm_ld_sched = 0;
815 /* Nonzero if this chip is a StrongARM. */
816 int arm_tune_strongarm = 0;
818 /* Nonzero if this chip supports Intel Wireless MMX technology. */
819 int arm_arch_iwmmxt = 0;
821 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
822 int arm_arch_iwmmxt2 = 0;
824 /* Nonzero if this chip is an XScale. */
825 int arm_arch_xscale = 0;
827 /* Nonzero if tuning for XScale */
828 int arm_tune_xscale = 0;
830 /* Nonzero if we want to tune for stores that access the write-buffer.
831 This typically means an ARM6 or ARM7 with MMU or MPU. */
832 int arm_tune_wbuf = 0;
834 /* Nonzero if tuning for Cortex-A9. */
835 int arm_tune_cortex_a9 = 0;
837 /* Nonzero if generating Thumb instructions. */
838 int thumb_code = 0;
840 /* Nonzero if generating Thumb-1 instructions. */
841 int thumb1_code = 0;
843 /* Nonzero if we should define __THUMB_INTERWORK__ in the
844 preprocessor.
845 XXX This is a bit of a hack, it's intended to help work around
846 problems in GLD which doesn't understand that armv5t code is
847 interworking clean. */
848 int arm_cpp_interwork = 0;
850 /* Nonzero if chip supports Thumb 2. */
851 int arm_arch_thumb2;
853 /* Nonzero if chip supports integer division instruction. */
854 int arm_arch_arm_hwdiv;
855 int arm_arch_thumb_hwdiv;
857 /* Nonzero if we should use Neon to handle 64-bits operations rather
858 than core registers. */
859 int prefer_neon_for_64bits = 0;
861 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
862 we must report the mode of the memory reference from
863 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
864 enum machine_mode output_memory_reference_mode;
866 /* The register number to be used for the PIC offset register. */
867 unsigned arm_pic_register = INVALID_REGNUM;
869 /* Set to 1 after arm_reorg has started. Reset to start at the start of
870 the next function. */
871 static int after_arm_reorg = 0;
873 enum arm_pcs arm_pcs_default;
875 /* For an explanation of these variables, see final_prescan_insn below. */
876 int arm_ccfsm_state;
877 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
878 enum arm_cond_code arm_current_cc;
880 rtx arm_target_insn;
881 int arm_target_label;
882 /* The number of conditionally executed insns, including the current insn. */
883 int arm_condexec_count = 0;
884 /* A bitmask specifying the patterns for the IT block.
885 Zero means do not output an IT block before this insn. */
886 int arm_condexec_mask = 0;
887 /* The number of bits used in arm_condexec_mask. */
888 int arm_condexec_masklen = 0;
890 /* Nonzero if chip supports the ARMv8 CRC instructions. */
891 int arm_arch_crc = 0;
893 /* The condition codes of the ARM, and the inverse function. */
894 static const char * const arm_condition_codes[] =
896 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
897 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
900 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
901 int arm_regs_in_sequence[] =
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
906 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
907 #define streq(string1, string2) (strcmp (string1, string2) == 0)
909 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
910 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
911 | (1 << PIC_OFFSET_TABLE_REGNUM)))
913 /* Initialization code. */
915 struct processors
917 const char *const name;
918 enum processor_type core;
919 const char *arch;
920 enum base_architecture base_arch;
921 const unsigned long flags;
922 const struct tune_params *const tune;
926 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
927 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
928 prefetch_slots, \
929 l1_size, \
930 l1_line_size
932 /* arm generic vectorizer costs. */
933 static const
934 struct cpu_vec_costs arm_default_vec_cost = {
935 1, /* scalar_stmt_cost. */
936 1, /* scalar load_cost. */
937 1, /* scalar_store_cost. */
938 1, /* vec_stmt_cost. */
939 1, /* vec_to_scalar_cost. */
940 1, /* scalar_to_vec_cost. */
941 1, /* vec_align_load_cost. */
942 1, /* vec_unalign_load_cost. */
943 1, /* vec_unalign_store_cost. */
944 1, /* vec_store_cost. */
945 3, /* cond_taken_branch_cost. */
946 1, /* cond_not_taken_branch_cost. */
949 const struct tune_params arm_slowmul_tune =
951 arm_slowmul_rtx_costs,
952 NULL,
953 3, /* Constant limit. */
954 5, /* Max cond insns. */
955 ARM_PREFETCH_NOT_BENEFICIAL,
956 true, /* Prefer constant pool. */
957 arm_default_branch_cost,
958 false, /* Prefer LDRD/STRD. */
959 {true, true}, /* Prefer non short circuit. */
960 &arm_default_vec_cost, /* Vectorizer costs. */
961 false /* Prefer Neon for 64-bits bitops. */
964 const struct tune_params arm_fastmul_tune =
966 arm_fastmul_rtx_costs,
967 NULL,
968 1, /* Constant limit. */
969 5, /* Max cond insns. */
970 ARM_PREFETCH_NOT_BENEFICIAL,
971 true, /* Prefer constant pool. */
972 arm_default_branch_cost,
973 false, /* Prefer LDRD/STRD. */
974 {true, true}, /* Prefer non short circuit. */
975 &arm_default_vec_cost, /* Vectorizer costs. */
976 false /* Prefer Neon for 64-bits bitops. */
979 /* StrongARM has early execution of branches, so a sequence that is worth
980 skipping is shorter. Set max_insns_skipped to a lower value. */
982 const struct tune_params arm_strongarm_tune =
984 arm_fastmul_rtx_costs,
985 NULL,
986 1, /* Constant limit. */
987 3, /* Max cond insns. */
988 ARM_PREFETCH_NOT_BENEFICIAL,
989 true, /* Prefer constant pool. */
990 arm_default_branch_cost,
991 false, /* Prefer LDRD/STRD. */
992 {true, true}, /* Prefer non short circuit. */
993 &arm_default_vec_cost, /* Vectorizer costs. */
994 false /* Prefer Neon for 64-bits bitops. */
997 const struct tune_params arm_xscale_tune =
999 arm_xscale_rtx_costs,
1000 xscale_sched_adjust_cost,
1001 2, /* Constant limit. */
1002 3, /* Max cond insns. */
1003 ARM_PREFETCH_NOT_BENEFICIAL,
1004 true, /* Prefer constant pool. */
1005 arm_default_branch_cost,
1006 false, /* Prefer LDRD/STRD. */
1007 {true, true}, /* Prefer non short circuit. */
1008 &arm_default_vec_cost, /* Vectorizer costs. */
1009 false /* Prefer Neon for 64-bits bitops. */
1012 const struct tune_params arm_9e_tune =
1014 arm_9e_rtx_costs,
1015 NULL,
1016 1, /* Constant limit. */
1017 5, /* Max cond insns. */
1018 ARM_PREFETCH_NOT_BENEFICIAL,
1019 true, /* Prefer constant pool. */
1020 arm_default_branch_cost,
1021 false, /* Prefer LDRD/STRD. */
1022 {true, true}, /* Prefer non short circuit. */
1023 &arm_default_vec_cost, /* Vectorizer costs. */
1024 false /* Prefer Neon for 64-bits bitops. */
1027 const struct tune_params arm_v6t2_tune =
1029 arm_9e_rtx_costs,
1030 NULL,
1031 1, /* Constant limit. */
1032 5, /* Max cond insns. */
1033 ARM_PREFETCH_NOT_BENEFICIAL,
1034 false, /* Prefer constant pool. */
1035 arm_default_branch_cost,
1036 false, /* Prefer LDRD/STRD. */
1037 {true, true}, /* Prefer non short circuit. */
1038 &arm_default_vec_cost, /* Vectorizer costs. */
1039 false /* Prefer Neon for 64-bits bitops. */
1042 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1043 const struct tune_params arm_cortex_tune =
1045 arm_9e_rtx_costs,
1046 NULL,
1047 1, /* Constant limit. */
1048 5, /* Max cond insns. */
1049 ARM_PREFETCH_NOT_BENEFICIAL,
1050 false, /* Prefer constant pool. */
1051 arm_default_branch_cost,
1052 false, /* Prefer LDRD/STRD. */
1053 {true, true}, /* Prefer non short circuit. */
1054 &arm_default_vec_cost, /* Vectorizer costs. */
1055 false /* Prefer Neon for 64-bits bitops. */
1058 const struct tune_params arm_cortex_a15_tune =
1060 arm_9e_rtx_costs,
1061 NULL,
1062 1, /* Constant limit. */
1063 2, /* Max cond insns. */
1064 ARM_PREFETCH_NOT_BENEFICIAL,
1065 false, /* Prefer constant pool. */
1066 arm_default_branch_cost,
1067 true, /* Prefer LDRD/STRD. */
1068 {true, true}, /* Prefer non short circuit. */
1069 &arm_default_vec_cost, /* Vectorizer costs. */
1070 false /* Prefer Neon for 64-bits bitops. */
1073 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1074 less appealing. Set max_insns_skipped to a low value. */
1076 const struct tune_params arm_cortex_a5_tune =
1078 arm_9e_rtx_costs,
1079 NULL,
1080 1, /* Constant limit. */
1081 1, /* Max cond insns. */
1082 ARM_PREFETCH_NOT_BENEFICIAL,
1083 false, /* Prefer constant pool. */
1084 arm_cortex_a5_branch_cost,
1085 false, /* Prefer LDRD/STRD. */
1086 {false, false}, /* Prefer non short circuit. */
1087 &arm_default_vec_cost, /* Vectorizer costs. */
1088 false /* Prefer Neon for 64-bits bitops. */
1091 const struct tune_params arm_cortex_a9_tune =
1093 arm_9e_rtx_costs,
1094 cortex_a9_sched_adjust_cost,
1095 1, /* Constant limit. */
1096 5, /* Max cond insns. */
1097 ARM_PREFETCH_BENEFICIAL(4,32,32),
1098 false, /* Prefer constant pool. */
1099 arm_default_branch_cost,
1100 false, /* Prefer LDRD/STRD. */
1101 {true, true}, /* Prefer non short circuit. */
1102 &arm_default_vec_cost, /* Vectorizer costs. */
1103 false /* Prefer Neon for 64-bits bitops. */
1106 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1107 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1108 const struct tune_params arm_v6m_tune =
1110 arm_9e_rtx_costs,
1111 NULL,
1112 1, /* Constant limit. */
1113 5, /* Max cond insns. */
1114 ARM_PREFETCH_NOT_BENEFICIAL,
1115 false, /* Prefer constant pool. */
1116 arm_default_branch_cost,
1117 false, /* Prefer LDRD/STRD. */
1118 {false, false}, /* Prefer non short circuit. */
1119 &arm_default_vec_cost, /* Vectorizer costs. */
1120 false /* Prefer Neon for 64-bits bitops. */
1123 const struct tune_params arm_fa726te_tune =
1125 arm_9e_rtx_costs,
1126 fa726te_sched_adjust_cost,
1127 1, /* Constant limit. */
1128 5, /* Max cond insns. */
1129 ARM_PREFETCH_NOT_BENEFICIAL,
1130 true, /* Prefer constant pool. */
1131 arm_default_branch_cost,
1132 false, /* Prefer LDRD/STRD. */
1133 {true, true}, /* Prefer non short circuit. */
1134 &arm_default_vec_cost, /* Vectorizer costs. */
1135 false /* Prefer Neon for 64-bits bitops. */
1139 /* Not all of these give usefully different compilation alternatives,
1140 but there is no simple way of generalizing them. */
1141 static const struct processors all_cores[] =
1143 /* ARM Cores */
1144 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1145 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1146 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1147 #include "arm-cores.def"
1148 #undef ARM_CORE
1149 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1152 static const struct processors all_architectures[] =
1154 /* ARM Architectures */
1155 /* We don't specify tuning costs here as it will be figured out
1156 from the core. */
1158 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1159 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1160 #include "arm-arches.def"
1161 #undef ARM_ARCH
1162 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1166 /* These are populated as commandline arguments are processed, or NULL
1167 if not specified. */
1168 static const struct processors *arm_selected_arch;
1169 static const struct processors *arm_selected_cpu;
1170 static const struct processors *arm_selected_tune;
1172 /* The name of the preprocessor macro to define for this architecture. */
1174 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1176 /* Available values for -mfpu=. */
1178 static const struct arm_fpu_desc all_fpus[] =
1180 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1181 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1182 #include "arm-fpus.def"
1183 #undef ARM_FPU
1187 /* Supported TLS relocations. */
1189 enum tls_reloc {
1190 TLS_GD32,
1191 TLS_LDM32,
1192 TLS_LDO32,
1193 TLS_IE32,
1194 TLS_LE32,
1195 TLS_DESCSEQ /* GNU scheme */
1198 /* The maximum number of insns to be used when loading a constant. */
1199 inline static int
1200 arm_constant_limit (bool size_p)
1202 return size_p ? 1 : current_tune->constant_limit;
1205 /* Emit an insn that's a simple single-set. Both the operands must be known
1206 to be valid. */
1207 inline static rtx
1208 emit_set_insn (rtx x, rtx y)
1210 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1213 /* Return the number of bits set in VALUE. */
1214 static unsigned
1215 bit_count (unsigned long value)
1217 unsigned long count = 0;
1219 while (value)
1221 count++;
1222 value &= value - 1; /* Clear the least-significant set bit. */
1225 return count;
1228 typedef struct
1230 enum machine_mode mode;
1231 const char *name;
1232 } arm_fixed_mode_set;
1234 /* A small helper for setting fixed-point library libfuncs. */
1236 static void
1237 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1238 const char *funcname, const char *modename,
1239 int num_suffix)
1241 char buffer[50];
1243 if (num_suffix == 0)
1244 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1245 else
1246 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1248 set_optab_libfunc (optable, mode, buffer);
1251 static void
1252 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1253 enum machine_mode from, const char *funcname,
1254 const char *toname, const char *fromname)
1256 char buffer[50];
1257 const char *maybe_suffix_2 = "";
1259 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1260 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1261 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1262 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1263 maybe_suffix_2 = "2";
1265 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1266 maybe_suffix_2);
1268 set_conv_libfunc (optable, to, from, buffer);
1271 /* Set up library functions unique to ARM. */
1273 static void
1274 arm_init_libfuncs (void)
1276 /* For Linux, we have access to kernel support for atomic operations. */
1277 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1278 init_sync_libfuncs (2 * UNITS_PER_WORD);
1280 /* There are no special library functions unless we are using the
1281 ARM BPABI. */
1282 if (!TARGET_BPABI)
1283 return;
1285 /* The functions below are described in Section 4 of the "Run-Time
1286 ABI for the ARM architecture", Version 1.0. */
1288 /* Double-precision floating-point arithmetic. Table 2. */
1289 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1290 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1291 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1292 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1293 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1295 /* Double-precision comparisons. Table 3. */
1296 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1297 set_optab_libfunc (ne_optab, DFmode, NULL);
1298 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1299 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1300 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1301 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1302 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1304 /* Single-precision floating-point arithmetic. Table 4. */
1305 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1306 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1307 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1308 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1309 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1311 /* Single-precision comparisons. Table 5. */
1312 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1313 set_optab_libfunc (ne_optab, SFmode, NULL);
1314 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1315 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1316 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1317 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1318 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1320 /* Floating-point to integer conversions. Table 6. */
1321 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1322 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1323 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1324 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1325 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1326 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1327 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1328 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1330 /* Conversions between floating types. Table 7. */
1331 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1332 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1334 /* Integer to floating-point conversions. Table 8. */
1335 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1336 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1337 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1338 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1339 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1340 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1341 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1342 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1344 /* Long long. Table 9. */
1345 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1346 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1347 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1348 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1349 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1350 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1351 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1352 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1354 /* Integer (32/32->32) division. \S 4.3.1. */
1355 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1356 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1358 /* The divmod functions are designed so that they can be used for
1359 plain division, even though they return both the quotient and the
1360 remainder. The quotient is returned in the usual location (i.e.,
1361 r0 for SImode, {r0, r1} for DImode), just as would be expected
1362 for an ordinary division routine. Because the AAPCS calling
1363 conventions specify that all of { r0, r1, r2, r3 } are
1364 callee-saved registers, there is no need to tell the compiler
1365 explicitly that those registers are clobbered by these
1366 routines. */
1367 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1368 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1370 /* For SImode division the ABI provides div-without-mod routines,
1371 which are faster. */
1372 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1373 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1375 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1376 divmod libcalls instead. */
1377 set_optab_libfunc (smod_optab, DImode, NULL);
1378 set_optab_libfunc (umod_optab, DImode, NULL);
1379 set_optab_libfunc (smod_optab, SImode, NULL);
1380 set_optab_libfunc (umod_optab, SImode, NULL);
1382 /* Half-precision float operations. The compiler handles all operations
1383 with NULL libfuncs by converting the SFmode. */
1384 switch (arm_fp16_format)
1386 case ARM_FP16_FORMAT_IEEE:
1387 case ARM_FP16_FORMAT_ALTERNATIVE:
1389 /* Conversions. */
1390 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1391 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1392 ? "__gnu_f2h_ieee"
1393 : "__gnu_f2h_alternative"));
1394 set_conv_libfunc (sext_optab, SFmode, HFmode,
1395 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1396 ? "__gnu_h2f_ieee"
1397 : "__gnu_h2f_alternative"));
1399 /* Arithmetic. */
1400 set_optab_libfunc (add_optab, HFmode, NULL);
1401 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1402 set_optab_libfunc (smul_optab, HFmode, NULL);
1403 set_optab_libfunc (neg_optab, HFmode, NULL);
1404 set_optab_libfunc (sub_optab, HFmode, NULL);
1406 /* Comparisons. */
1407 set_optab_libfunc (eq_optab, HFmode, NULL);
1408 set_optab_libfunc (ne_optab, HFmode, NULL);
1409 set_optab_libfunc (lt_optab, HFmode, NULL);
1410 set_optab_libfunc (le_optab, HFmode, NULL);
1411 set_optab_libfunc (ge_optab, HFmode, NULL);
1412 set_optab_libfunc (gt_optab, HFmode, NULL);
1413 set_optab_libfunc (unord_optab, HFmode, NULL);
1414 break;
1416 default:
1417 break;
1420 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1422 const arm_fixed_mode_set fixed_arith_modes[] =
1424 { QQmode, "qq" },
1425 { UQQmode, "uqq" },
1426 { HQmode, "hq" },
1427 { UHQmode, "uhq" },
1428 { SQmode, "sq" },
1429 { USQmode, "usq" },
1430 { DQmode, "dq" },
1431 { UDQmode, "udq" },
1432 { TQmode, "tq" },
1433 { UTQmode, "utq" },
1434 { HAmode, "ha" },
1435 { UHAmode, "uha" },
1436 { SAmode, "sa" },
1437 { USAmode, "usa" },
1438 { DAmode, "da" },
1439 { UDAmode, "uda" },
1440 { TAmode, "ta" },
1441 { UTAmode, "uta" }
1443 const arm_fixed_mode_set fixed_conv_modes[] =
1445 { QQmode, "qq" },
1446 { UQQmode, "uqq" },
1447 { HQmode, "hq" },
1448 { UHQmode, "uhq" },
1449 { SQmode, "sq" },
1450 { USQmode, "usq" },
1451 { DQmode, "dq" },
1452 { UDQmode, "udq" },
1453 { TQmode, "tq" },
1454 { UTQmode, "utq" },
1455 { HAmode, "ha" },
1456 { UHAmode, "uha" },
1457 { SAmode, "sa" },
1458 { USAmode, "usa" },
1459 { DAmode, "da" },
1460 { UDAmode, "uda" },
1461 { TAmode, "ta" },
1462 { UTAmode, "uta" },
1463 { QImode, "qi" },
1464 { HImode, "hi" },
1465 { SImode, "si" },
1466 { DImode, "di" },
1467 { TImode, "ti" },
1468 { SFmode, "sf" },
1469 { DFmode, "df" }
1471 unsigned int i, j;
1473 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1475 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1476 "add", fixed_arith_modes[i].name, 3);
1477 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1478 "ssadd", fixed_arith_modes[i].name, 3);
1479 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1480 "usadd", fixed_arith_modes[i].name, 3);
1481 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1482 "sub", fixed_arith_modes[i].name, 3);
1483 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1484 "sssub", fixed_arith_modes[i].name, 3);
1485 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1486 "ussub", fixed_arith_modes[i].name, 3);
1487 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1488 "mul", fixed_arith_modes[i].name, 3);
1489 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1490 "ssmul", fixed_arith_modes[i].name, 3);
1491 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1492 "usmul", fixed_arith_modes[i].name, 3);
1493 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1494 "div", fixed_arith_modes[i].name, 3);
1495 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1496 "udiv", fixed_arith_modes[i].name, 3);
1497 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1498 "ssdiv", fixed_arith_modes[i].name, 3);
1499 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1500 "usdiv", fixed_arith_modes[i].name, 3);
1501 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1502 "neg", fixed_arith_modes[i].name, 2);
1503 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1504 "ssneg", fixed_arith_modes[i].name, 2);
1505 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1506 "usneg", fixed_arith_modes[i].name, 2);
1507 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1508 "ashl", fixed_arith_modes[i].name, 3);
1509 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1510 "ashr", fixed_arith_modes[i].name, 3);
1511 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1512 "lshr", fixed_arith_modes[i].name, 3);
1513 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1514 "ssashl", fixed_arith_modes[i].name, 3);
1515 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1516 "usashl", fixed_arith_modes[i].name, 3);
1517 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1518 "cmp", fixed_arith_modes[i].name, 2);
1521 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1522 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1524 if (i == j
1525 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1526 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1527 continue;
1529 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1530 fixed_conv_modes[j].mode, "fract",
1531 fixed_conv_modes[i].name,
1532 fixed_conv_modes[j].name);
1533 arm_set_fixed_conv_libfunc (satfract_optab,
1534 fixed_conv_modes[i].mode,
1535 fixed_conv_modes[j].mode, "satfract",
1536 fixed_conv_modes[i].name,
1537 fixed_conv_modes[j].name);
1538 arm_set_fixed_conv_libfunc (fractuns_optab,
1539 fixed_conv_modes[i].mode,
1540 fixed_conv_modes[j].mode, "fractuns",
1541 fixed_conv_modes[i].name,
1542 fixed_conv_modes[j].name);
1543 arm_set_fixed_conv_libfunc (satfractuns_optab,
1544 fixed_conv_modes[i].mode,
1545 fixed_conv_modes[j].mode, "satfractuns",
1546 fixed_conv_modes[i].name,
1547 fixed_conv_modes[j].name);
1551 if (TARGET_AAPCS_BASED)
1552 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1555 /* On AAPCS systems, this is the "struct __va_list". */
1556 static GTY(()) tree va_list_type;
1558 /* Return the type to use as __builtin_va_list. */
1559 static tree
1560 arm_build_builtin_va_list (void)
1562 tree va_list_name;
1563 tree ap_field;
1565 if (!TARGET_AAPCS_BASED)
1566 return std_build_builtin_va_list ();
1568 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1569 defined as:
1571 struct __va_list
1573 void *__ap;
1576 The C Library ABI further reinforces this definition in \S
1577 4.1.
1579 We must follow this definition exactly. The structure tag
1580 name is visible in C++ mangled names, and thus forms a part
1581 of the ABI. The field name may be used by people who
1582 #include <stdarg.h>. */
1583 /* Create the type. */
1584 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1585 /* Give it the required name. */
1586 va_list_name = build_decl (BUILTINS_LOCATION,
1587 TYPE_DECL,
1588 get_identifier ("__va_list"),
1589 va_list_type);
1590 DECL_ARTIFICIAL (va_list_name) = 1;
1591 TYPE_NAME (va_list_type) = va_list_name;
1592 TYPE_STUB_DECL (va_list_type) = va_list_name;
1593 /* Create the __ap field. */
1594 ap_field = build_decl (BUILTINS_LOCATION,
1595 FIELD_DECL,
1596 get_identifier ("__ap"),
1597 ptr_type_node);
1598 DECL_ARTIFICIAL (ap_field) = 1;
1599 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1600 TYPE_FIELDS (va_list_type) = ap_field;
1601 /* Compute its layout. */
1602 layout_type (va_list_type);
1604 return va_list_type;
1607 /* Return an expression of type "void *" pointing to the next
1608 available argument in a variable-argument list. VALIST is the
1609 user-level va_list object, of type __builtin_va_list. */
1610 static tree
1611 arm_extract_valist_ptr (tree valist)
1613 if (TREE_TYPE (valist) == error_mark_node)
1614 return error_mark_node;
1616 /* On an AAPCS target, the pointer is stored within "struct
1617 va_list". */
1618 if (TARGET_AAPCS_BASED)
1620 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1621 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1622 valist, ap_field, NULL_TREE);
1625 return valist;
1628 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1629 static void
1630 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1632 valist = arm_extract_valist_ptr (valist);
1633 std_expand_builtin_va_start (valist, nextarg);
1636 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1637 static tree
1638 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1639 gimple_seq *post_p)
1641 valist = arm_extract_valist_ptr (valist);
1642 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1645 /* Fix up any incompatible options that the user has specified. */
1646 static void
1647 arm_option_override (void)
1649 if (global_options_set.x_arm_arch_option)
1650 arm_selected_arch = &all_architectures[arm_arch_option];
1652 if (global_options_set.x_arm_cpu_option)
1653 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1655 if (global_options_set.x_arm_tune_option)
1656 arm_selected_tune = &all_cores[(int) arm_tune_option];
1658 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1659 SUBTARGET_OVERRIDE_OPTIONS;
1660 #endif
1662 if (arm_selected_arch)
1664 if (arm_selected_cpu)
1666 /* Check for conflict between mcpu and march. */
1667 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1669 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1670 arm_selected_cpu->name, arm_selected_arch->name);
1671 /* -march wins for code generation.
1672 -mcpu wins for default tuning. */
1673 if (!arm_selected_tune)
1674 arm_selected_tune = arm_selected_cpu;
1676 arm_selected_cpu = arm_selected_arch;
1678 else
1679 /* -mcpu wins. */
1680 arm_selected_arch = NULL;
1682 else
1683 /* Pick a CPU based on the architecture. */
1684 arm_selected_cpu = arm_selected_arch;
1687 /* If the user did not specify a processor, choose one for them. */
1688 if (!arm_selected_cpu)
1690 const struct processors * sel;
1691 unsigned int sought;
1693 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1694 if (!arm_selected_cpu->name)
1696 #ifdef SUBTARGET_CPU_DEFAULT
1697 /* Use the subtarget default CPU if none was specified by
1698 configure. */
1699 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1700 #endif
1701 /* Default to ARM6. */
1702 if (!arm_selected_cpu->name)
1703 arm_selected_cpu = &all_cores[arm6];
1706 sel = arm_selected_cpu;
1707 insn_flags = sel->flags;
1709 /* Now check to see if the user has specified some command line
1710 switch that require certain abilities from the cpu. */
1711 sought = 0;
1713 if (TARGET_INTERWORK || TARGET_THUMB)
1715 sought |= (FL_THUMB | FL_MODE32);
1717 /* There are no ARM processors that support both APCS-26 and
1718 interworking. Therefore we force FL_MODE26 to be removed
1719 from insn_flags here (if it was set), so that the search
1720 below will always be able to find a compatible processor. */
1721 insn_flags &= ~FL_MODE26;
1724 if (sought != 0 && ((sought & insn_flags) != sought))
1726 /* Try to locate a CPU type that supports all of the abilities
1727 of the default CPU, plus the extra abilities requested by
1728 the user. */
1729 for (sel = all_cores; sel->name != NULL; sel++)
1730 if ((sel->flags & sought) == (sought | insn_flags))
1731 break;
1733 if (sel->name == NULL)
1735 unsigned current_bit_count = 0;
1736 const struct processors * best_fit = NULL;
1738 /* Ideally we would like to issue an error message here
1739 saying that it was not possible to find a CPU compatible
1740 with the default CPU, but which also supports the command
1741 line options specified by the programmer, and so they
1742 ought to use the -mcpu=<name> command line option to
1743 override the default CPU type.
1745 If we cannot find a cpu that has both the
1746 characteristics of the default cpu and the given
1747 command line options we scan the array again looking
1748 for a best match. */
1749 for (sel = all_cores; sel->name != NULL; sel++)
1750 if ((sel->flags & sought) == sought)
1752 unsigned count;
1754 count = bit_count (sel->flags & insn_flags);
1756 if (count >= current_bit_count)
1758 best_fit = sel;
1759 current_bit_count = count;
1763 gcc_assert (best_fit);
1764 sel = best_fit;
1767 arm_selected_cpu = sel;
1771 gcc_assert (arm_selected_cpu);
1772 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1773 if (!arm_selected_tune)
1774 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1776 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1777 insn_flags = arm_selected_cpu->flags;
1778 arm_base_arch = arm_selected_cpu->base_arch;
1780 arm_tune = arm_selected_tune->core;
1781 tune_flags = arm_selected_tune->flags;
1782 current_tune = arm_selected_tune->tune;
1784 /* Make sure that the processor choice does not conflict with any of the
1785 other command line choices. */
1786 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1787 error ("target CPU does not support ARM mode");
1789 /* BPABI targets use linker tricks to allow interworking on cores
1790 without thumb support. */
1791 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1793 warning (0, "target CPU does not support interworking" );
1794 target_flags &= ~MASK_INTERWORK;
1797 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1799 warning (0, "target CPU does not support THUMB instructions");
1800 target_flags &= ~MASK_THUMB;
1803 if (TARGET_APCS_FRAME && TARGET_THUMB)
1805 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1806 target_flags &= ~MASK_APCS_FRAME;
1809 /* Callee super interworking implies thumb interworking. Adding
1810 this to the flags here simplifies the logic elsewhere. */
1811 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1812 target_flags |= MASK_INTERWORK;
1814 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1815 from here where no function is being compiled currently. */
1816 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1817 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1819 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1820 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1822 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1824 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1825 target_flags |= MASK_APCS_FRAME;
1828 if (TARGET_POKE_FUNCTION_NAME)
1829 target_flags |= MASK_APCS_FRAME;
1831 if (TARGET_APCS_REENT && flag_pic)
1832 error ("-fpic and -mapcs-reent are incompatible");
1834 if (TARGET_APCS_REENT)
1835 warning (0, "APCS reentrant code not supported. Ignored");
1837 /* If this target is normally configured to use APCS frames, warn if they
1838 are turned off and debugging is turned on. */
1839 if (TARGET_ARM
1840 && write_symbols != NO_DEBUG
1841 && !TARGET_APCS_FRAME
1842 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1843 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1845 if (TARGET_APCS_FLOAT)
1846 warning (0, "passing floating point arguments in fp regs not yet supported");
1848 if (TARGET_LITTLE_WORDS)
1849 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1850 "will be removed in a future release");
1852 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1853 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1854 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1855 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1856 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1857 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1858 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1859 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1860 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1861 arm_arch6m = arm_arch6 && !arm_arch_notm;
1862 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1863 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1864 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1865 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1866 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1868 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1869 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1870 thumb_code = TARGET_ARM == 0;
1871 thumb1_code = TARGET_THUMB1 != 0;
1872 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1873 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1874 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1875 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1876 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1877 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1878 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1879 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
1880 if (arm_restrict_it == 2)
1881 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
1883 if (!TARGET_THUMB2)
1884 arm_restrict_it = 0;
1886 /* If we are not using the default (ARM mode) section anchor offset
1887 ranges, then set the correct ranges now. */
1888 if (TARGET_THUMB1)
1890 /* Thumb-1 LDR instructions cannot have negative offsets.
1891 Permissible positive offset ranges are 5-bit (for byte loads),
1892 6-bit (for halfword loads), or 7-bit (for word loads).
1893 Empirical results suggest a 7-bit anchor range gives the best
1894 overall code size. */
1895 targetm.min_anchor_offset = 0;
1896 targetm.max_anchor_offset = 127;
1898 else if (TARGET_THUMB2)
1900 /* The minimum is set such that the total size of the block
1901 for a particular anchor is 248 + 1 + 4095 bytes, which is
1902 divisible by eight, ensuring natural spacing of anchors. */
1903 targetm.min_anchor_offset = -248;
1904 targetm.max_anchor_offset = 4095;
1907 /* V5 code we generate is completely interworking capable, so we turn off
1908 TARGET_INTERWORK here to avoid many tests later on. */
1910 /* XXX However, we must pass the right pre-processor defines to CPP
1911 or GLD can get confused. This is a hack. */
1912 if (TARGET_INTERWORK)
1913 arm_cpp_interwork = 1;
1915 if (arm_arch5)
1916 target_flags &= ~MASK_INTERWORK;
1918 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1919 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1921 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1922 error ("iwmmxt abi requires an iwmmxt capable cpu");
1924 if (!global_options_set.x_arm_fpu_index)
1926 const char *target_fpu_name;
1927 bool ok;
1929 #ifdef FPUTYPE_DEFAULT
1930 target_fpu_name = FPUTYPE_DEFAULT;
1931 #else
1932 target_fpu_name = "vfp";
1933 #endif
1935 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1936 CL_TARGET);
1937 gcc_assert (ok);
1940 arm_fpu_desc = &all_fpus[arm_fpu_index];
1942 switch (arm_fpu_desc->model)
1944 case ARM_FP_MODEL_VFP:
1945 arm_fpu_attr = FPU_VFP;
1946 break;
1948 default:
1949 gcc_unreachable();
1952 if (TARGET_AAPCS_BASED)
1954 if (TARGET_CALLER_INTERWORKING)
1955 error ("AAPCS does not support -mcaller-super-interworking");
1956 else
1957 if (TARGET_CALLEE_INTERWORKING)
1958 error ("AAPCS does not support -mcallee-super-interworking");
1961 /* iWMMXt and NEON are incompatible. */
1962 if (TARGET_IWMMXT && TARGET_NEON)
1963 error ("iWMMXt and NEON are incompatible");
1965 /* iWMMXt unsupported under Thumb mode. */
1966 if (TARGET_THUMB && TARGET_IWMMXT)
1967 error ("iWMMXt unsupported under Thumb mode");
1969 /* __fp16 support currently assumes the core has ldrh. */
1970 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1971 sorry ("__fp16 and no ldrh");
1973 /* If soft-float is specified then don't use FPU. */
1974 if (TARGET_SOFT_FLOAT)
1975 arm_fpu_attr = FPU_NONE;
1977 if (TARGET_AAPCS_BASED)
1979 if (arm_abi == ARM_ABI_IWMMXT)
1980 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1981 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1982 && TARGET_HARD_FLOAT
1983 && TARGET_VFP)
1984 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1985 else
1986 arm_pcs_default = ARM_PCS_AAPCS;
1988 else
1990 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1991 sorry ("-mfloat-abi=hard and VFP");
1993 if (arm_abi == ARM_ABI_APCS)
1994 arm_pcs_default = ARM_PCS_APCS;
1995 else
1996 arm_pcs_default = ARM_PCS_ATPCS;
1999 /* For arm2/3 there is no need to do any scheduling if we are doing
2000 software floating-point. */
2001 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2002 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2004 /* Use the cp15 method if it is available. */
2005 if (target_thread_pointer == TP_AUTO)
2007 if (arm_arch6k && !TARGET_THUMB1)
2008 target_thread_pointer = TP_CP15;
2009 else
2010 target_thread_pointer = TP_SOFT;
2013 if (TARGET_HARD_TP && TARGET_THUMB1)
2014 error ("can not use -mtp=cp15 with 16-bit Thumb");
2016 /* Override the default structure alignment for AAPCS ABI. */
2017 if (!global_options_set.x_arm_structure_size_boundary)
2019 if (TARGET_AAPCS_BASED)
2020 arm_structure_size_boundary = 8;
2022 else
2024 if (arm_structure_size_boundary != 8
2025 && arm_structure_size_boundary != 32
2026 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2028 if (ARM_DOUBLEWORD_ALIGN)
2029 warning (0,
2030 "structure size boundary can only be set to 8, 32 or 64");
2031 else
2032 warning (0, "structure size boundary can only be set to 8 or 32");
2033 arm_structure_size_boundary
2034 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2038 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2040 error ("RTP PIC is incompatible with Thumb");
2041 flag_pic = 0;
2044 /* If stack checking is disabled, we can use r10 as the PIC register,
2045 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2046 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2048 if (TARGET_VXWORKS_RTP)
2049 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2050 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2053 if (flag_pic && TARGET_VXWORKS_RTP)
2054 arm_pic_register = 9;
2056 if (arm_pic_register_string != NULL)
2058 int pic_register = decode_reg_name (arm_pic_register_string);
2060 if (!flag_pic)
2061 warning (0, "-mpic-register= is useless without -fpic");
2063 /* Prevent the user from choosing an obviously stupid PIC register. */
2064 else if (pic_register < 0 || call_used_regs[pic_register]
2065 || pic_register == HARD_FRAME_POINTER_REGNUM
2066 || pic_register == STACK_POINTER_REGNUM
2067 || pic_register >= PC_REGNUM
2068 || (TARGET_VXWORKS_RTP
2069 && (unsigned int) pic_register != arm_pic_register))
2070 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2071 else
2072 arm_pic_register = pic_register;
2075 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2076 if (fix_cm3_ldrd == 2)
2078 if (arm_selected_cpu->core == cortexm3)
2079 fix_cm3_ldrd = 1;
2080 else
2081 fix_cm3_ldrd = 0;
2084 /* Enable -munaligned-access by default for
2085 - all ARMv6 architecture-based processors
2086 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2087 - ARMv8 architecture-base processors.
2089 Disable -munaligned-access by default for
2090 - all pre-ARMv6 architecture-based processors
2091 - ARMv6-M architecture-based processors. */
2093 if (unaligned_access == 2)
2095 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2096 unaligned_access = 1;
2097 else
2098 unaligned_access = 0;
2100 else if (unaligned_access == 1
2101 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2103 warning (0, "target CPU does not support unaligned accesses");
2104 unaligned_access = 0;
2107 if (TARGET_THUMB1 && flag_schedule_insns)
2109 /* Don't warn since it's on by default in -O2. */
2110 flag_schedule_insns = 0;
2113 if (optimize_size)
2115 /* If optimizing for size, bump the number of instructions that we
2116 are prepared to conditionally execute (even on a StrongARM). */
2117 max_insns_skipped = 6;
2119 else
2120 max_insns_skipped = current_tune->max_insns_skipped;
2122 /* Hot/Cold partitioning is not currently supported, since we can't
2123 handle literal pool placement in that case. */
2124 if (flag_reorder_blocks_and_partition)
2126 inform (input_location,
2127 "-freorder-blocks-and-partition not supported on this architecture");
2128 flag_reorder_blocks_and_partition = 0;
2129 flag_reorder_blocks = 1;
2132 if (flag_pic)
2133 /* Hoisting PIC address calculations more aggressively provides a small,
2134 but measurable, size reduction for PIC code. Therefore, we decrease
2135 the bar for unrestricted expression hoisting to the cost of PIC address
2136 calculation, which is 2 instructions. */
2137 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2138 global_options.x_param_values,
2139 global_options_set.x_param_values);
2141 /* ARM EABI defaults to strict volatile bitfields. */
2142 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2143 && abi_version_at_least(2))
2144 flag_strict_volatile_bitfields = 1;
2146 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2147 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2148 if (flag_prefetch_loop_arrays < 0
2149 && HAVE_prefetch
2150 && optimize >= 3
2151 && current_tune->num_prefetch_slots > 0)
2152 flag_prefetch_loop_arrays = 1;
2154 /* Set up parameters to be used in prefetching algorithm. Do not override the
2155 defaults unless we are tuning for a core we have researched values for. */
2156 if (current_tune->num_prefetch_slots > 0)
2157 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2158 current_tune->num_prefetch_slots,
2159 global_options.x_param_values,
2160 global_options_set.x_param_values);
2161 if (current_tune->l1_cache_line_size >= 0)
2162 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2163 current_tune->l1_cache_line_size,
2164 global_options.x_param_values,
2165 global_options_set.x_param_values);
2166 if (current_tune->l1_cache_size >= 0)
2167 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2168 current_tune->l1_cache_size,
2169 global_options.x_param_values,
2170 global_options_set.x_param_values);
2172 /* Use Neon to perform 64-bits operations rather than core
2173 registers. */
2174 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2175 if (use_neon_for_64bits == 1)
2176 prefer_neon_for_64bits = true;
2178 /* Use the alternative scheduling-pressure algorithm by default. */
2179 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2180 global_options.x_param_values,
2181 global_options_set.x_param_values);
2183 /* Disable shrink-wrap when optimizing function for size, since it tends to
2184 generate additional returns. */
2185 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2186 flag_shrink_wrap = false;
2187 /* TBD: Dwarf info for apcs frame is not handled yet. */
2188 if (TARGET_APCS_FRAME)
2189 flag_shrink_wrap = false;
2191 /* Register global variables with the garbage collector. */
2192 arm_add_gc_roots ();
2195 static void
2196 arm_add_gc_roots (void)
2198 gcc_obstack_init(&minipool_obstack);
2199 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2202 /* A table of known ARM exception types.
2203 For use with the interrupt function attribute. */
2205 typedef struct
2207 const char *const arg;
2208 const unsigned long return_value;
2210 isr_attribute_arg;
2212 static const isr_attribute_arg isr_attribute_args [] =
2214 { "IRQ", ARM_FT_ISR },
2215 { "irq", ARM_FT_ISR },
2216 { "FIQ", ARM_FT_FIQ },
2217 { "fiq", ARM_FT_FIQ },
2218 { "ABORT", ARM_FT_ISR },
2219 { "abort", ARM_FT_ISR },
2220 { "ABORT", ARM_FT_ISR },
2221 { "abort", ARM_FT_ISR },
2222 { "UNDEF", ARM_FT_EXCEPTION },
2223 { "undef", ARM_FT_EXCEPTION },
2224 { "SWI", ARM_FT_EXCEPTION },
2225 { "swi", ARM_FT_EXCEPTION },
2226 { NULL, ARM_FT_NORMAL }
2229 /* Returns the (interrupt) function type of the current
2230 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2232 static unsigned long
2233 arm_isr_value (tree argument)
2235 const isr_attribute_arg * ptr;
2236 const char * arg;
2238 if (!arm_arch_notm)
2239 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2241 /* No argument - default to IRQ. */
2242 if (argument == NULL_TREE)
2243 return ARM_FT_ISR;
2245 /* Get the value of the argument. */
2246 if (TREE_VALUE (argument) == NULL_TREE
2247 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2248 return ARM_FT_UNKNOWN;
2250 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2252 /* Check it against the list of known arguments. */
2253 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2254 if (streq (arg, ptr->arg))
2255 return ptr->return_value;
2257 /* An unrecognized interrupt type. */
2258 return ARM_FT_UNKNOWN;
2261 /* Computes the type of the current function. */
2263 static unsigned long
2264 arm_compute_func_type (void)
2266 unsigned long type = ARM_FT_UNKNOWN;
2267 tree a;
2268 tree attr;
2270 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2272 /* Decide if the current function is volatile. Such functions
2273 never return, and many memory cycles can be saved by not storing
2274 register values that will never be needed again. This optimization
2275 was added to speed up context switching in a kernel application. */
2276 if (optimize > 0
2277 && (TREE_NOTHROW (current_function_decl)
2278 || !(flag_unwind_tables
2279 || (flag_exceptions
2280 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2281 && TREE_THIS_VOLATILE (current_function_decl))
2282 type |= ARM_FT_VOLATILE;
2284 if (cfun->static_chain_decl != NULL)
2285 type |= ARM_FT_NESTED;
2287 attr = DECL_ATTRIBUTES (current_function_decl);
2289 a = lookup_attribute ("naked", attr);
2290 if (a != NULL_TREE)
2291 type |= ARM_FT_NAKED;
2293 a = lookup_attribute ("isr", attr);
2294 if (a == NULL_TREE)
2295 a = lookup_attribute ("interrupt", attr);
2297 if (a == NULL_TREE)
2298 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2299 else
2300 type |= arm_isr_value (TREE_VALUE (a));
2302 return type;
2305 /* Returns the type of the current function. */
2307 unsigned long
2308 arm_current_func_type (void)
2310 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2311 cfun->machine->func_type = arm_compute_func_type ();
2313 return cfun->machine->func_type;
2316 bool
2317 arm_allocate_stack_slots_for_args (void)
2319 /* Naked functions should not allocate stack slots for arguments. */
2320 return !IS_NAKED (arm_current_func_type ());
2323 static bool
2324 arm_warn_func_return (tree decl)
2326 /* Naked functions are implemented entirely in assembly, including the
2327 return sequence, so suppress warnings about this. */
2328 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2332 /* Output assembler code for a block containing the constant parts
2333 of a trampoline, leaving space for the variable parts.
2335 On the ARM, (if r8 is the static chain regnum, and remembering that
2336 referencing pc adds an offset of 8) the trampoline looks like:
2337 ldr r8, [pc, #0]
2338 ldr pc, [pc]
2339 .word static chain value
2340 .word function's address
2341 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2343 static void
2344 arm_asm_trampoline_template (FILE *f)
2346 if (TARGET_ARM)
2348 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2349 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2351 else if (TARGET_THUMB2)
2353 /* The Thumb-2 trampoline is similar to the arm implementation.
2354 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2355 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2356 STATIC_CHAIN_REGNUM, PC_REGNUM);
2357 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2359 else
2361 ASM_OUTPUT_ALIGN (f, 2);
2362 fprintf (f, "\t.code\t16\n");
2363 fprintf (f, ".Ltrampoline_start:\n");
2364 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2365 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2366 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2367 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2368 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2369 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2371 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2372 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2375 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2377 static void
2378 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2380 rtx fnaddr, mem, a_tramp;
2382 emit_block_move (m_tramp, assemble_trampoline_template (),
2383 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2385 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2386 emit_move_insn (mem, chain_value);
2388 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2389 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2390 emit_move_insn (mem, fnaddr);
2392 a_tramp = XEXP (m_tramp, 0);
2393 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2394 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2395 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2398 /* Thumb trampolines should be entered in thumb mode, so set
2399 the bottom bit of the address. */
2401 static rtx
2402 arm_trampoline_adjust_address (rtx addr)
2404 if (TARGET_THUMB)
2405 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2406 NULL, 0, OPTAB_LIB_WIDEN);
2407 return addr;
2410 /* Return 1 if it is possible to return using a single instruction.
2411 If SIBLING is non-null, this is a test for a return before a sibling
2412 call. SIBLING is the call insn, so we can examine its register usage. */
2415 use_return_insn (int iscond, rtx sibling)
2417 int regno;
2418 unsigned int func_type;
2419 unsigned long saved_int_regs;
2420 unsigned HOST_WIDE_INT stack_adjust;
2421 arm_stack_offsets *offsets;
2423 /* Never use a return instruction before reload has run. */
2424 if (!reload_completed)
2425 return 0;
2427 func_type = arm_current_func_type ();
2429 /* Naked, volatile and stack alignment functions need special
2430 consideration. */
2431 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2432 return 0;
2434 /* So do interrupt functions that use the frame pointer and Thumb
2435 interrupt functions. */
2436 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2437 return 0;
2439 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2440 && !optimize_function_for_size_p (cfun))
2441 return 0;
2443 offsets = arm_get_frame_offsets ();
2444 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2446 /* As do variadic functions. */
2447 if (crtl->args.pretend_args_size
2448 || cfun->machine->uses_anonymous_args
2449 /* Or if the function calls __builtin_eh_return () */
2450 || crtl->calls_eh_return
2451 /* Or if the function calls alloca */
2452 || cfun->calls_alloca
2453 /* Or if there is a stack adjustment. However, if the stack pointer
2454 is saved on the stack, we can use a pre-incrementing stack load. */
2455 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2456 && stack_adjust == 4)))
2457 return 0;
2459 saved_int_regs = offsets->saved_regs_mask;
2461 /* Unfortunately, the insn
2463 ldmib sp, {..., sp, ...}
2465 triggers a bug on most SA-110 based devices, such that the stack
2466 pointer won't be correctly restored if the instruction takes a
2467 page fault. We work around this problem by popping r3 along with
2468 the other registers, since that is never slower than executing
2469 another instruction.
2471 We test for !arm_arch5 here, because code for any architecture
2472 less than this could potentially be run on one of the buggy
2473 chips. */
2474 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2476 /* Validate that r3 is a call-clobbered register (always true in
2477 the default abi) ... */
2478 if (!call_used_regs[3])
2479 return 0;
2481 /* ... that it isn't being used for a return value ... */
2482 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2483 return 0;
2485 /* ... or for a tail-call argument ... */
2486 if (sibling)
2488 gcc_assert (CALL_P (sibling));
2490 if (find_regno_fusage (sibling, USE, 3))
2491 return 0;
2494 /* ... and that there are no call-saved registers in r0-r2
2495 (always true in the default ABI). */
2496 if (saved_int_regs & 0x7)
2497 return 0;
2500 /* Can't be done if interworking with Thumb, and any registers have been
2501 stacked. */
2502 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2503 return 0;
2505 /* On StrongARM, conditional returns are expensive if they aren't
2506 taken and multiple registers have been stacked. */
2507 if (iscond && arm_tune_strongarm)
2509 /* Conditional return when just the LR is stored is a simple
2510 conditional-load instruction, that's not expensive. */
2511 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2512 return 0;
2514 if (flag_pic
2515 && arm_pic_register != INVALID_REGNUM
2516 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2517 return 0;
2520 /* If there are saved registers but the LR isn't saved, then we need
2521 two instructions for the return. */
2522 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2523 return 0;
2525 /* Can't be done if any of the VFP regs are pushed,
2526 since this also requires an insn. */
2527 if (TARGET_HARD_FLOAT && TARGET_VFP)
2528 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2529 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2530 return 0;
2532 if (TARGET_REALLY_IWMMXT)
2533 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2534 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2535 return 0;
2537 return 1;
2540 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2541 shrink-wrapping if possible. This is the case if we need to emit a
2542 prologue, which we can test by looking at the offsets. */
2543 bool
2544 use_simple_return_p (void)
2546 arm_stack_offsets *offsets;
2548 offsets = arm_get_frame_offsets ();
2549 return offsets->outgoing_args != 0;
2552 /* Return TRUE if int I is a valid immediate ARM constant. */
2555 const_ok_for_arm (HOST_WIDE_INT i)
2557 int lowbit;
2559 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2560 be all zero, or all one. */
2561 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2562 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2563 != ((~(unsigned HOST_WIDE_INT) 0)
2564 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2565 return FALSE;
2567 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2569 /* Fast return for 0 and small values. We must do this for zero, since
2570 the code below can't handle that one case. */
2571 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2572 return TRUE;
2574 /* Get the number of trailing zeros. */
2575 lowbit = ffs((int) i) - 1;
2577 /* Only even shifts are allowed in ARM mode so round down to the
2578 nearest even number. */
2579 if (TARGET_ARM)
2580 lowbit &= ~1;
2582 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2583 return TRUE;
2585 if (TARGET_ARM)
2587 /* Allow rotated constants in ARM mode. */
2588 if (lowbit <= 4
2589 && ((i & ~0xc000003f) == 0
2590 || (i & ~0xf000000f) == 0
2591 || (i & ~0xfc000003) == 0))
2592 return TRUE;
2594 else
2596 HOST_WIDE_INT v;
2598 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2599 v = i & 0xff;
2600 v |= v << 16;
2601 if (i == v || i == (v | (v << 8)))
2602 return TRUE;
2604 /* Allow repeated pattern 0xXY00XY00. */
2605 v = i & 0xff00;
2606 v |= v << 16;
2607 if (i == v)
2608 return TRUE;
2611 return FALSE;
2614 /* Return true if I is a valid constant for the operation CODE. */
2616 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2618 if (const_ok_for_arm (i))
2619 return 1;
2621 switch (code)
2623 case SET:
2624 /* See if we can use movw. */
2625 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2626 return 1;
2627 else
2628 /* Otherwise, try mvn. */
2629 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2631 case PLUS:
2632 /* See if we can use addw or subw. */
2633 if (TARGET_THUMB2
2634 && ((i & 0xfffff000) == 0
2635 || ((-i) & 0xfffff000) == 0))
2636 return 1;
2637 /* else fall through. */
2639 case COMPARE:
2640 case EQ:
2641 case NE:
2642 case GT:
2643 case LE:
2644 case LT:
2645 case GE:
2646 case GEU:
2647 case LTU:
2648 case GTU:
2649 case LEU:
2650 case UNORDERED:
2651 case ORDERED:
2652 case UNEQ:
2653 case UNGE:
2654 case UNLT:
2655 case UNGT:
2656 case UNLE:
2657 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2659 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2660 case XOR:
2661 return 0;
2663 case IOR:
2664 if (TARGET_THUMB2)
2665 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2666 return 0;
2668 case AND:
2669 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2671 default:
2672 gcc_unreachable ();
2676 /* Return true if I is a valid di mode constant for the operation CODE. */
2678 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2680 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2681 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2682 rtx hi = GEN_INT (hi_val);
2683 rtx lo = GEN_INT (lo_val);
2685 if (TARGET_THUMB1)
2686 return 0;
2688 switch (code)
2690 case AND:
2691 case IOR:
2692 case XOR:
2693 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
2694 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
2695 case PLUS:
2696 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2698 default:
2699 return 0;
2703 /* Emit a sequence of insns to handle a large constant.
2704 CODE is the code of the operation required, it can be any of SET, PLUS,
2705 IOR, AND, XOR, MINUS;
2706 MODE is the mode in which the operation is being performed;
2707 VAL is the integer to operate on;
2708 SOURCE is the other operand (a register, or a null-pointer for SET);
2709 SUBTARGETS means it is safe to create scratch registers if that will
2710 either produce a simpler sequence, or we will want to cse the values.
2711 Return value is the number of insns emitted. */
2713 /* ??? Tweak this for thumb2. */
2715 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2716 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2718 rtx cond;
2720 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2721 cond = COND_EXEC_TEST (PATTERN (insn));
2722 else
2723 cond = NULL_RTX;
2725 if (subtargets || code == SET
2726 || (REG_P (target) && REG_P (source)
2727 && REGNO (target) != REGNO (source)))
2729 /* After arm_reorg has been called, we can't fix up expensive
2730 constants by pushing them into memory so we must synthesize
2731 them in-line, regardless of the cost. This is only likely to
2732 be more costly on chips that have load delay slots and we are
2733 compiling without running the scheduler (so no splitting
2734 occurred before the final instruction emission).
2736 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2738 if (!after_arm_reorg
2739 && !cond
2740 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2741 1, 0)
2742 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2743 + (code != SET))))
2745 if (code == SET)
2747 /* Currently SET is the only monadic value for CODE, all
2748 the rest are diadic. */
2749 if (TARGET_USE_MOVT)
2750 arm_emit_movpair (target, GEN_INT (val));
2751 else
2752 emit_set_insn (target, GEN_INT (val));
2754 return 1;
2756 else
2758 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2760 if (TARGET_USE_MOVT)
2761 arm_emit_movpair (temp, GEN_INT (val));
2762 else
2763 emit_set_insn (temp, GEN_INT (val));
2765 /* For MINUS, the value is subtracted from, since we never
2766 have subtraction of a constant. */
2767 if (code == MINUS)
2768 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2769 else
2770 emit_set_insn (target,
2771 gen_rtx_fmt_ee (code, mode, source, temp));
2772 return 2;
2777 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2781 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2782 ARM/THUMB2 immediates, and add up to VAL.
2783 Thr function return value gives the number of insns required. */
2784 static int
2785 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2786 struct four_ints *return_sequence)
2788 int best_consecutive_zeros = 0;
2789 int i;
2790 int best_start = 0;
2791 int insns1, insns2;
2792 struct four_ints tmp_sequence;
2794 /* If we aren't targeting ARM, the best place to start is always at
2795 the bottom, otherwise look more closely. */
2796 if (TARGET_ARM)
2798 for (i = 0; i < 32; i += 2)
2800 int consecutive_zeros = 0;
2802 if (!(val & (3 << i)))
2804 while ((i < 32) && !(val & (3 << i)))
2806 consecutive_zeros += 2;
2807 i += 2;
2809 if (consecutive_zeros > best_consecutive_zeros)
2811 best_consecutive_zeros = consecutive_zeros;
2812 best_start = i - consecutive_zeros;
2814 i -= 2;
2819 /* So long as it won't require any more insns to do so, it's
2820 desirable to emit a small constant (in bits 0...9) in the last
2821 insn. This way there is more chance that it can be combined with
2822 a later addressing insn to form a pre-indexed load or store
2823 operation. Consider:
2825 *((volatile int *)0xe0000100) = 1;
2826 *((volatile int *)0xe0000110) = 2;
2828 We want this to wind up as:
2830 mov rA, #0xe0000000
2831 mov rB, #1
2832 str rB, [rA, #0x100]
2833 mov rB, #2
2834 str rB, [rA, #0x110]
2836 rather than having to synthesize both large constants from scratch.
2838 Therefore, we calculate how many insns would be required to emit
2839 the constant starting from `best_start', and also starting from
2840 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2841 yield a shorter sequence, we may as well use zero. */
2842 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2843 if (best_start != 0
2844 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2846 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2847 if (insns2 <= insns1)
2849 *return_sequence = tmp_sequence;
2850 insns1 = insns2;
2854 return insns1;
2857 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2858 static int
2859 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2860 struct four_ints *return_sequence, int i)
2862 int remainder = val & 0xffffffff;
2863 int insns = 0;
2865 /* Try and find a way of doing the job in either two or three
2866 instructions.
2868 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2869 location. We start at position I. This may be the MSB, or
2870 optimial_immediate_sequence may have positioned it at the largest block
2871 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2872 wrapping around to the top of the word when we drop off the bottom.
2873 In the worst case this code should produce no more than four insns.
2875 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2876 constants, shifted to any arbitrary location. We should always start
2877 at the MSB. */
2880 int end;
2881 unsigned int b1, b2, b3, b4;
2882 unsigned HOST_WIDE_INT result;
2883 int loc;
2885 gcc_assert (insns < 4);
2887 if (i <= 0)
2888 i += 32;
2890 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2891 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2893 loc = i;
2894 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2895 /* We can use addw/subw for the last 12 bits. */
2896 result = remainder;
2897 else
2899 /* Use an 8-bit shifted/rotated immediate. */
2900 end = i - 8;
2901 if (end < 0)
2902 end += 32;
2903 result = remainder & ((0x0ff << end)
2904 | ((i < end) ? (0xff >> (32 - end))
2905 : 0));
2906 i -= 8;
2909 else
2911 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2912 arbitrary shifts. */
2913 i -= TARGET_ARM ? 2 : 1;
2914 continue;
2917 /* Next, see if we can do a better job with a thumb2 replicated
2918 constant.
2920 We do it this way around to catch the cases like 0x01F001E0 where
2921 two 8-bit immediates would work, but a replicated constant would
2922 make it worse.
2924 TODO: 16-bit constants that don't clear all the bits, but still win.
2925 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2926 if (TARGET_THUMB2)
2928 b1 = (remainder & 0xff000000) >> 24;
2929 b2 = (remainder & 0x00ff0000) >> 16;
2930 b3 = (remainder & 0x0000ff00) >> 8;
2931 b4 = remainder & 0xff;
2933 if (loc > 24)
2935 /* The 8-bit immediate already found clears b1 (and maybe b2),
2936 but must leave b3 and b4 alone. */
2938 /* First try to find a 32-bit replicated constant that clears
2939 almost everything. We can assume that we can't do it in one,
2940 or else we wouldn't be here. */
2941 unsigned int tmp = b1 & b2 & b3 & b4;
2942 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2943 + (tmp << 24);
2944 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2945 + (tmp == b3) + (tmp == b4);
2946 if (tmp
2947 && (matching_bytes >= 3
2948 || (matching_bytes == 2
2949 && const_ok_for_op (remainder & ~tmp2, code))))
2951 /* At least 3 of the bytes match, and the fourth has at
2952 least as many bits set, or two of the bytes match
2953 and it will only require one more insn to finish. */
2954 result = tmp2;
2955 i = tmp != b1 ? 32
2956 : tmp != b2 ? 24
2957 : tmp != b3 ? 16
2958 : 8;
2961 /* Second, try to find a 16-bit replicated constant that can
2962 leave three of the bytes clear. If b2 or b4 is already
2963 zero, then we can. If the 8-bit from above would not
2964 clear b2 anyway, then we still win. */
2965 else if (b1 == b3 && (!b2 || !b4
2966 || (remainder & 0x00ff0000 & ~result)))
2968 result = remainder & 0xff00ff00;
2969 i = 24;
2972 else if (loc > 16)
2974 /* The 8-bit immediate already found clears b2 (and maybe b3)
2975 and we don't get here unless b1 is alredy clear, but it will
2976 leave b4 unchanged. */
2978 /* If we can clear b2 and b4 at once, then we win, since the
2979 8-bits couldn't possibly reach that far. */
2980 if (b2 == b4)
2982 result = remainder & 0x00ff00ff;
2983 i = 16;
2988 return_sequence->i[insns++] = result;
2989 remainder &= ~result;
2991 if (code == SET || code == MINUS)
2992 code = PLUS;
2994 while (remainder);
2996 return insns;
2999 /* Emit an instruction with the indicated PATTERN. If COND is
3000 non-NULL, conditionalize the execution of the instruction on COND
3001 being true. */
3003 static void
3004 emit_constant_insn (rtx cond, rtx pattern)
3006 if (cond)
3007 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3008 emit_insn (pattern);
3011 /* As above, but extra parameter GENERATE which, if clear, suppresses
3012 RTL generation. */
3014 static int
3015 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3016 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3017 int generate)
3019 int can_invert = 0;
3020 int can_negate = 0;
3021 int final_invert = 0;
3022 int i;
3023 int set_sign_bit_copies = 0;
3024 int clear_sign_bit_copies = 0;
3025 int clear_zero_bit_copies = 0;
3026 int set_zero_bit_copies = 0;
3027 int insns = 0, neg_insns, inv_insns;
3028 unsigned HOST_WIDE_INT temp1, temp2;
3029 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3030 struct four_ints *immediates;
3031 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3033 /* Find out which operations are safe for a given CODE. Also do a quick
3034 check for degenerate cases; these can occur when DImode operations
3035 are split. */
3036 switch (code)
3038 case SET:
3039 can_invert = 1;
3040 break;
3042 case PLUS:
3043 can_negate = 1;
3044 break;
3046 case IOR:
3047 if (remainder == 0xffffffff)
3049 if (generate)
3050 emit_constant_insn (cond,
3051 gen_rtx_SET (VOIDmode, target,
3052 GEN_INT (ARM_SIGN_EXTEND (val))));
3053 return 1;
3056 if (remainder == 0)
3058 if (reload_completed && rtx_equal_p (target, source))
3059 return 0;
3061 if (generate)
3062 emit_constant_insn (cond,
3063 gen_rtx_SET (VOIDmode, target, source));
3064 return 1;
3066 break;
3068 case AND:
3069 if (remainder == 0)
3071 if (generate)
3072 emit_constant_insn (cond,
3073 gen_rtx_SET (VOIDmode, target, const0_rtx));
3074 return 1;
3076 if (remainder == 0xffffffff)
3078 if (reload_completed && rtx_equal_p (target, source))
3079 return 0;
3080 if (generate)
3081 emit_constant_insn (cond,
3082 gen_rtx_SET (VOIDmode, target, source));
3083 return 1;
3085 can_invert = 1;
3086 break;
3088 case XOR:
3089 if (remainder == 0)
3091 if (reload_completed && rtx_equal_p (target, source))
3092 return 0;
3093 if (generate)
3094 emit_constant_insn (cond,
3095 gen_rtx_SET (VOIDmode, target, source));
3096 return 1;
3099 if (remainder == 0xffffffff)
3101 if (generate)
3102 emit_constant_insn (cond,
3103 gen_rtx_SET (VOIDmode, target,
3104 gen_rtx_NOT (mode, source)));
3105 return 1;
3107 final_invert = 1;
3108 break;
3110 case MINUS:
3111 /* We treat MINUS as (val - source), since (source - val) is always
3112 passed as (source + (-val)). */
3113 if (remainder == 0)
3115 if (generate)
3116 emit_constant_insn (cond,
3117 gen_rtx_SET (VOIDmode, target,
3118 gen_rtx_NEG (mode, source)));
3119 return 1;
3121 if (const_ok_for_arm (val))
3123 if (generate)
3124 emit_constant_insn (cond,
3125 gen_rtx_SET (VOIDmode, target,
3126 gen_rtx_MINUS (mode, GEN_INT (val),
3127 source)));
3128 return 1;
3131 break;
3133 default:
3134 gcc_unreachable ();
3137 /* If we can do it in one insn get out quickly. */
3138 if (const_ok_for_op (val, code))
3140 if (generate)
3141 emit_constant_insn (cond,
3142 gen_rtx_SET (VOIDmode, target,
3143 (source
3144 ? gen_rtx_fmt_ee (code, mode, source,
3145 GEN_INT (val))
3146 : GEN_INT (val))));
3147 return 1;
3150 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3151 insn. */
3152 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3153 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3155 if (generate)
3157 if (mode == SImode && i == 16)
3158 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3159 smaller insn. */
3160 emit_constant_insn (cond,
3161 gen_zero_extendhisi2
3162 (target, gen_lowpart (HImode, source)));
3163 else
3164 /* Extz only supports SImode, but we can coerce the operands
3165 into that mode. */
3166 emit_constant_insn (cond,
3167 gen_extzv_t2 (gen_lowpart (SImode, target),
3168 gen_lowpart (SImode, source),
3169 GEN_INT (i), const0_rtx));
3172 return 1;
3175 /* Calculate a few attributes that may be useful for specific
3176 optimizations. */
3177 /* Count number of leading zeros. */
3178 for (i = 31; i >= 0; i--)
3180 if ((remainder & (1 << i)) == 0)
3181 clear_sign_bit_copies++;
3182 else
3183 break;
3186 /* Count number of leading 1's. */
3187 for (i = 31; i >= 0; i--)
3189 if ((remainder & (1 << i)) != 0)
3190 set_sign_bit_copies++;
3191 else
3192 break;
3195 /* Count number of trailing zero's. */
3196 for (i = 0; i <= 31; i++)
3198 if ((remainder & (1 << i)) == 0)
3199 clear_zero_bit_copies++;
3200 else
3201 break;
3204 /* Count number of trailing 1's. */
3205 for (i = 0; i <= 31; i++)
3207 if ((remainder & (1 << i)) != 0)
3208 set_zero_bit_copies++;
3209 else
3210 break;
3213 switch (code)
3215 case SET:
3216 /* See if we can do this by sign_extending a constant that is known
3217 to be negative. This is a good, way of doing it, since the shift
3218 may well merge into a subsequent insn. */
3219 if (set_sign_bit_copies > 1)
3221 if (const_ok_for_arm
3222 (temp1 = ARM_SIGN_EXTEND (remainder
3223 << (set_sign_bit_copies - 1))))
3225 if (generate)
3227 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3228 emit_constant_insn (cond,
3229 gen_rtx_SET (VOIDmode, new_src,
3230 GEN_INT (temp1)));
3231 emit_constant_insn (cond,
3232 gen_ashrsi3 (target, new_src,
3233 GEN_INT (set_sign_bit_copies - 1)));
3235 return 2;
3237 /* For an inverted constant, we will need to set the low bits,
3238 these will be shifted out of harm's way. */
3239 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3240 if (const_ok_for_arm (~temp1))
3242 if (generate)
3244 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3245 emit_constant_insn (cond,
3246 gen_rtx_SET (VOIDmode, new_src,
3247 GEN_INT (temp1)));
3248 emit_constant_insn (cond,
3249 gen_ashrsi3 (target, new_src,
3250 GEN_INT (set_sign_bit_copies - 1)));
3252 return 2;
3256 /* See if we can calculate the value as the difference between two
3257 valid immediates. */
3258 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3260 int topshift = clear_sign_bit_copies & ~1;
3262 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3263 & (0xff000000 >> topshift));
3265 /* If temp1 is zero, then that means the 9 most significant
3266 bits of remainder were 1 and we've caused it to overflow.
3267 When topshift is 0 we don't need to do anything since we
3268 can borrow from 'bit 32'. */
3269 if (temp1 == 0 && topshift != 0)
3270 temp1 = 0x80000000 >> (topshift - 1);
3272 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3274 if (const_ok_for_arm (temp2))
3276 if (generate)
3278 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3279 emit_constant_insn (cond,
3280 gen_rtx_SET (VOIDmode, new_src,
3281 GEN_INT (temp1)));
3282 emit_constant_insn (cond,
3283 gen_addsi3 (target, new_src,
3284 GEN_INT (-temp2)));
3287 return 2;
3291 /* See if we can generate this by setting the bottom (or the top)
3292 16 bits, and then shifting these into the other half of the
3293 word. We only look for the simplest cases, to do more would cost
3294 too much. Be careful, however, not to generate this when the
3295 alternative would take fewer insns. */
3296 if (val & 0xffff0000)
3298 temp1 = remainder & 0xffff0000;
3299 temp2 = remainder & 0x0000ffff;
3301 /* Overlaps outside this range are best done using other methods. */
3302 for (i = 9; i < 24; i++)
3304 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3305 && !const_ok_for_arm (temp2))
3307 rtx new_src = (subtargets
3308 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3309 : target);
3310 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3311 source, subtargets, generate);
3312 source = new_src;
3313 if (generate)
3314 emit_constant_insn
3315 (cond,
3316 gen_rtx_SET
3317 (VOIDmode, target,
3318 gen_rtx_IOR (mode,
3319 gen_rtx_ASHIFT (mode, source,
3320 GEN_INT (i)),
3321 source)));
3322 return insns + 1;
3326 /* Don't duplicate cases already considered. */
3327 for (i = 17; i < 24; i++)
3329 if (((temp1 | (temp1 >> i)) == remainder)
3330 && !const_ok_for_arm (temp1))
3332 rtx new_src = (subtargets
3333 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3334 : target);
3335 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3336 source, subtargets, generate);
3337 source = new_src;
3338 if (generate)
3339 emit_constant_insn
3340 (cond,
3341 gen_rtx_SET (VOIDmode, target,
3342 gen_rtx_IOR
3343 (mode,
3344 gen_rtx_LSHIFTRT (mode, source,
3345 GEN_INT (i)),
3346 source)));
3347 return insns + 1;
3351 break;
3353 case IOR:
3354 case XOR:
3355 /* If we have IOR or XOR, and the constant can be loaded in a
3356 single instruction, and we can find a temporary to put it in,
3357 then this can be done in two instructions instead of 3-4. */
3358 if (subtargets
3359 /* TARGET can't be NULL if SUBTARGETS is 0 */
3360 || (reload_completed && !reg_mentioned_p (target, source)))
3362 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3364 if (generate)
3366 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3368 emit_constant_insn (cond,
3369 gen_rtx_SET (VOIDmode, sub,
3370 GEN_INT (val)));
3371 emit_constant_insn (cond,
3372 gen_rtx_SET (VOIDmode, target,
3373 gen_rtx_fmt_ee (code, mode,
3374 source, sub)));
3376 return 2;
3380 if (code == XOR)
3381 break;
3383 /* Convert.
3384 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3385 and the remainder 0s for e.g. 0xfff00000)
3386 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3388 This can be done in 2 instructions by using shifts with mov or mvn.
3389 e.g. for
3390 x = x | 0xfff00000;
3391 we generate.
3392 mvn r0, r0, asl #12
3393 mvn r0, r0, lsr #12 */
3394 if (set_sign_bit_copies > 8
3395 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3397 if (generate)
3399 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3400 rtx shift = GEN_INT (set_sign_bit_copies);
3402 emit_constant_insn
3403 (cond,
3404 gen_rtx_SET (VOIDmode, sub,
3405 gen_rtx_NOT (mode,
3406 gen_rtx_ASHIFT (mode,
3407 source,
3408 shift))));
3409 emit_constant_insn
3410 (cond,
3411 gen_rtx_SET (VOIDmode, target,
3412 gen_rtx_NOT (mode,
3413 gen_rtx_LSHIFTRT (mode, sub,
3414 shift))));
3416 return 2;
3419 /* Convert
3420 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3422 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3424 For eg. r0 = r0 | 0xfff
3425 mvn r0, r0, lsr #12
3426 mvn r0, r0, asl #12
3429 if (set_zero_bit_copies > 8
3430 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3432 if (generate)
3434 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3435 rtx shift = GEN_INT (set_zero_bit_copies);
3437 emit_constant_insn
3438 (cond,
3439 gen_rtx_SET (VOIDmode, sub,
3440 gen_rtx_NOT (mode,
3441 gen_rtx_LSHIFTRT (mode,
3442 source,
3443 shift))));
3444 emit_constant_insn
3445 (cond,
3446 gen_rtx_SET (VOIDmode, target,
3447 gen_rtx_NOT (mode,
3448 gen_rtx_ASHIFT (mode, sub,
3449 shift))));
3451 return 2;
3454 /* This will never be reached for Thumb2 because orn is a valid
3455 instruction. This is for Thumb1 and the ARM 32 bit cases.
3457 x = y | constant (such that ~constant is a valid constant)
3458 Transform this to
3459 x = ~(~y & ~constant).
3461 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3463 if (generate)
3465 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3466 emit_constant_insn (cond,
3467 gen_rtx_SET (VOIDmode, sub,
3468 gen_rtx_NOT (mode, source)));
3469 source = sub;
3470 if (subtargets)
3471 sub = gen_reg_rtx (mode);
3472 emit_constant_insn (cond,
3473 gen_rtx_SET (VOIDmode, sub,
3474 gen_rtx_AND (mode, source,
3475 GEN_INT (temp1))));
3476 emit_constant_insn (cond,
3477 gen_rtx_SET (VOIDmode, target,
3478 gen_rtx_NOT (mode, sub)));
3480 return 3;
3482 break;
3484 case AND:
3485 /* See if two shifts will do 2 or more insn's worth of work. */
3486 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3488 HOST_WIDE_INT shift_mask = ((0xffffffff
3489 << (32 - clear_sign_bit_copies))
3490 & 0xffffffff);
3492 if ((remainder | shift_mask) != 0xffffffff)
3494 if (generate)
3496 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3497 insns = arm_gen_constant (AND, mode, cond,
3498 remainder | shift_mask,
3499 new_src, source, subtargets, 1);
3500 source = new_src;
3502 else
3504 rtx targ = subtargets ? NULL_RTX : target;
3505 insns = arm_gen_constant (AND, mode, cond,
3506 remainder | shift_mask,
3507 targ, source, subtargets, 0);
3511 if (generate)
3513 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3514 rtx shift = GEN_INT (clear_sign_bit_copies);
3516 emit_insn (gen_ashlsi3 (new_src, source, shift));
3517 emit_insn (gen_lshrsi3 (target, new_src, shift));
3520 return insns + 2;
3523 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3525 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3527 if ((remainder | shift_mask) != 0xffffffff)
3529 if (generate)
3531 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3533 insns = arm_gen_constant (AND, mode, cond,
3534 remainder | shift_mask,
3535 new_src, source, subtargets, 1);
3536 source = new_src;
3538 else
3540 rtx targ = subtargets ? NULL_RTX : target;
3542 insns = arm_gen_constant (AND, mode, cond,
3543 remainder | shift_mask,
3544 targ, source, subtargets, 0);
3548 if (generate)
3550 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3551 rtx shift = GEN_INT (clear_zero_bit_copies);
3553 emit_insn (gen_lshrsi3 (new_src, source, shift));
3554 emit_insn (gen_ashlsi3 (target, new_src, shift));
3557 return insns + 2;
3560 break;
3562 default:
3563 break;
3566 /* Calculate what the instruction sequences would be if we generated it
3567 normally, negated, or inverted. */
3568 if (code == AND)
3569 /* AND cannot be split into multiple insns, so invert and use BIC. */
3570 insns = 99;
3571 else
3572 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3574 if (can_negate)
3575 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3576 &neg_immediates);
3577 else
3578 neg_insns = 99;
3580 if (can_invert || final_invert)
3581 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3582 &inv_immediates);
3583 else
3584 inv_insns = 99;
3586 immediates = &pos_immediates;
3588 /* Is the negated immediate sequence more efficient? */
3589 if (neg_insns < insns && neg_insns <= inv_insns)
3591 insns = neg_insns;
3592 immediates = &neg_immediates;
3594 else
3595 can_negate = 0;
3597 /* Is the inverted immediate sequence more efficient?
3598 We must allow for an extra NOT instruction for XOR operations, although
3599 there is some chance that the final 'mvn' will get optimized later. */
3600 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3602 insns = inv_insns;
3603 immediates = &inv_immediates;
3605 else
3607 can_invert = 0;
3608 final_invert = 0;
3611 /* Now output the chosen sequence as instructions. */
3612 if (generate)
3614 for (i = 0; i < insns; i++)
3616 rtx new_src, temp1_rtx;
3618 temp1 = immediates->i[i];
3620 if (code == SET || code == MINUS)
3621 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3622 else if ((final_invert || i < (insns - 1)) && subtargets)
3623 new_src = gen_reg_rtx (mode);
3624 else
3625 new_src = target;
3627 if (can_invert)
3628 temp1 = ~temp1;
3629 else if (can_negate)
3630 temp1 = -temp1;
3632 temp1 = trunc_int_for_mode (temp1, mode);
3633 temp1_rtx = GEN_INT (temp1);
3635 if (code == SET)
3637 else if (code == MINUS)
3638 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3639 else
3640 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3642 emit_constant_insn (cond,
3643 gen_rtx_SET (VOIDmode, new_src,
3644 temp1_rtx));
3645 source = new_src;
3647 if (code == SET)
3649 can_negate = can_invert;
3650 can_invert = 0;
3651 code = PLUS;
3653 else if (code == MINUS)
3654 code = PLUS;
3658 if (final_invert)
3660 if (generate)
3661 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3662 gen_rtx_NOT (mode, source)));
3663 insns++;
3666 return insns;
3669 /* Canonicalize a comparison so that we are more likely to recognize it.
3670 This can be done for a few constant compares, where we can make the
3671 immediate value easier to load. */
3673 static void
3674 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3675 bool op0_preserve_value)
3677 enum machine_mode mode;
3678 unsigned HOST_WIDE_INT i, maxval;
3680 mode = GET_MODE (*op0);
3681 if (mode == VOIDmode)
3682 mode = GET_MODE (*op1);
3684 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3686 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3687 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3688 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3689 for GTU/LEU in Thumb mode. */
3690 if (mode == DImode)
3692 rtx tem;
3694 if (*code == GT || *code == LE
3695 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3697 /* Missing comparison. First try to use an available
3698 comparison. */
3699 if (CONST_INT_P (*op1))
3701 i = INTVAL (*op1);
3702 switch (*code)
3704 case GT:
3705 case LE:
3706 if (i != maxval
3707 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3709 *op1 = GEN_INT (i + 1);
3710 *code = *code == GT ? GE : LT;
3711 return;
3713 break;
3714 case GTU:
3715 case LEU:
3716 if (i != ~((unsigned HOST_WIDE_INT) 0)
3717 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3719 *op1 = GEN_INT (i + 1);
3720 *code = *code == GTU ? GEU : LTU;
3721 return;
3723 break;
3724 default:
3725 gcc_unreachable ();
3729 /* If that did not work, reverse the condition. */
3730 if (!op0_preserve_value)
3732 tem = *op0;
3733 *op0 = *op1;
3734 *op1 = tem;
3735 *code = (int)swap_condition ((enum rtx_code)*code);
3738 return;
3741 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3742 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3743 to facilitate possible combining with a cmp into 'ands'. */
3744 if (mode == SImode
3745 && GET_CODE (*op0) == ZERO_EXTEND
3746 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3747 && GET_MODE (XEXP (*op0, 0)) == QImode
3748 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3749 && subreg_lowpart_p (XEXP (*op0, 0))
3750 && *op1 == const0_rtx)
3751 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3752 GEN_INT (255));
3754 /* Comparisons smaller than DImode. Only adjust comparisons against
3755 an out-of-range constant. */
3756 if (!CONST_INT_P (*op1)
3757 || const_ok_for_arm (INTVAL (*op1))
3758 || const_ok_for_arm (- INTVAL (*op1)))
3759 return;
3761 i = INTVAL (*op1);
3763 switch (*code)
3765 case EQ:
3766 case NE:
3767 return;
3769 case GT:
3770 case LE:
3771 if (i != maxval
3772 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3774 *op1 = GEN_INT (i + 1);
3775 *code = *code == GT ? GE : LT;
3776 return;
3778 break;
3780 case GE:
3781 case LT:
3782 if (i != ~maxval
3783 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3785 *op1 = GEN_INT (i - 1);
3786 *code = *code == GE ? GT : LE;
3787 return;
3789 break;
3791 case GTU:
3792 case LEU:
3793 if (i != ~((unsigned HOST_WIDE_INT) 0)
3794 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3796 *op1 = GEN_INT (i + 1);
3797 *code = *code == GTU ? GEU : LTU;
3798 return;
3800 break;
3802 case GEU:
3803 case LTU:
3804 if (i != 0
3805 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3807 *op1 = GEN_INT (i - 1);
3808 *code = *code == GEU ? GTU : LEU;
3809 return;
3811 break;
3813 default:
3814 gcc_unreachable ();
3819 /* Define how to find the value returned by a function. */
3821 static rtx
3822 arm_function_value(const_tree type, const_tree func,
3823 bool outgoing ATTRIBUTE_UNUSED)
3825 enum machine_mode mode;
3826 int unsignedp ATTRIBUTE_UNUSED;
3827 rtx r ATTRIBUTE_UNUSED;
3829 mode = TYPE_MODE (type);
3831 if (TARGET_AAPCS_BASED)
3832 return aapcs_allocate_return_reg (mode, type, func);
3834 /* Promote integer types. */
3835 if (INTEGRAL_TYPE_P (type))
3836 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3838 /* Promotes small structs returned in a register to full-word size
3839 for big-endian AAPCS. */
3840 if (arm_return_in_msb (type))
3842 HOST_WIDE_INT size = int_size_in_bytes (type);
3843 if (size % UNITS_PER_WORD != 0)
3845 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3846 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3850 return arm_libcall_value_1 (mode);
3853 static int
3854 libcall_eq (const void *p1, const void *p2)
3856 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3859 static hashval_t
3860 libcall_hash (const void *p1)
3862 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3865 static void
3866 add_libcall (htab_t htab, rtx libcall)
3868 *htab_find_slot (htab, libcall, INSERT) = libcall;
3871 static bool
3872 arm_libcall_uses_aapcs_base (const_rtx libcall)
3874 static bool init_done = false;
3875 static htab_t libcall_htab;
3877 if (!init_done)
3879 init_done = true;
3881 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3882 NULL);
3883 add_libcall (libcall_htab,
3884 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3885 add_libcall (libcall_htab,
3886 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3887 add_libcall (libcall_htab,
3888 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3889 add_libcall (libcall_htab,
3890 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3892 add_libcall (libcall_htab,
3893 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3894 add_libcall (libcall_htab,
3895 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3896 add_libcall (libcall_htab,
3897 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3898 add_libcall (libcall_htab,
3899 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3901 add_libcall (libcall_htab,
3902 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3903 add_libcall (libcall_htab,
3904 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3905 add_libcall (libcall_htab,
3906 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3907 add_libcall (libcall_htab,
3908 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3909 add_libcall (libcall_htab,
3910 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3911 add_libcall (libcall_htab,
3912 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3913 add_libcall (libcall_htab,
3914 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3915 add_libcall (libcall_htab,
3916 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3918 /* Values from double-precision helper functions are returned in core
3919 registers if the selected core only supports single-precision
3920 arithmetic, even if we are using the hard-float ABI. The same is
3921 true for single-precision helpers, but we will never be using the
3922 hard-float ABI on a CPU which doesn't support single-precision
3923 operations in hardware. */
3924 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3925 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3926 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3927 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3928 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3929 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3930 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3931 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3932 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3933 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3934 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3935 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3936 SFmode));
3937 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3938 DFmode));
3941 return libcall && htab_find (libcall_htab, libcall) != NULL;
3944 static rtx
3945 arm_libcall_value_1 (enum machine_mode mode)
3947 if (TARGET_AAPCS_BASED)
3948 return aapcs_libcall_value (mode);
3949 else if (TARGET_IWMMXT_ABI
3950 && arm_vector_mode_supported_p (mode))
3951 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3952 else
3953 return gen_rtx_REG (mode, ARG_REGISTER (1));
3956 /* Define how to find the value returned by a library function
3957 assuming the value has mode MODE. */
3959 static rtx
3960 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3962 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3963 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3965 /* The following libcalls return their result in integer registers,
3966 even though they return a floating point value. */
3967 if (arm_libcall_uses_aapcs_base (libcall))
3968 return gen_rtx_REG (mode, ARG_REGISTER(1));
3972 return arm_libcall_value_1 (mode);
3975 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3977 static bool
3978 arm_function_value_regno_p (const unsigned int regno)
3980 if (regno == ARG_REGISTER (1)
3981 || (TARGET_32BIT
3982 && TARGET_AAPCS_BASED
3983 && TARGET_VFP
3984 && TARGET_HARD_FLOAT
3985 && regno == FIRST_VFP_REGNUM)
3986 || (TARGET_IWMMXT_ABI
3987 && regno == FIRST_IWMMXT_REGNUM))
3988 return true;
3990 return false;
3993 /* Determine the amount of memory needed to store the possible return
3994 registers of an untyped call. */
3996 arm_apply_result_size (void)
3998 int size = 16;
4000 if (TARGET_32BIT)
4002 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4003 size += 32;
4004 if (TARGET_IWMMXT_ABI)
4005 size += 8;
4008 return size;
4011 /* Decide whether TYPE should be returned in memory (true)
4012 or in a register (false). FNTYPE is the type of the function making
4013 the call. */
4014 static bool
4015 arm_return_in_memory (const_tree type, const_tree fntype)
4017 HOST_WIDE_INT size;
4019 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4021 if (TARGET_AAPCS_BASED)
4023 /* Simple, non-aggregate types (ie not including vectors and
4024 complex) are always returned in a register (or registers).
4025 We don't care about which register here, so we can short-cut
4026 some of the detail. */
4027 if (!AGGREGATE_TYPE_P (type)
4028 && TREE_CODE (type) != VECTOR_TYPE
4029 && TREE_CODE (type) != COMPLEX_TYPE)
4030 return false;
4032 /* Any return value that is no larger than one word can be
4033 returned in r0. */
4034 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4035 return false;
4037 /* Check any available co-processors to see if they accept the
4038 type as a register candidate (VFP, for example, can return
4039 some aggregates in consecutive registers). These aren't
4040 available if the call is variadic. */
4041 if (aapcs_select_return_coproc (type, fntype) >= 0)
4042 return false;
4044 /* Vector values should be returned using ARM registers, not
4045 memory (unless they're over 16 bytes, which will break since
4046 we only have four call-clobbered registers to play with). */
4047 if (TREE_CODE (type) == VECTOR_TYPE)
4048 return (size < 0 || size > (4 * UNITS_PER_WORD));
4050 /* The rest go in memory. */
4051 return true;
4054 if (TREE_CODE (type) == VECTOR_TYPE)
4055 return (size < 0 || size > (4 * UNITS_PER_WORD));
4057 if (!AGGREGATE_TYPE_P (type) &&
4058 (TREE_CODE (type) != VECTOR_TYPE))
4059 /* All simple types are returned in registers. */
4060 return false;
4062 if (arm_abi != ARM_ABI_APCS)
4064 /* ATPCS and later return aggregate types in memory only if they are
4065 larger than a word (or are variable size). */
4066 return (size < 0 || size > UNITS_PER_WORD);
4069 /* For the arm-wince targets we choose to be compatible with Microsoft's
4070 ARM and Thumb compilers, which always return aggregates in memory. */
4071 #ifndef ARM_WINCE
4072 /* All structures/unions bigger than one word are returned in memory.
4073 Also catch the case where int_size_in_bytes returns -1. In this case
4074 the aggregate is either huge or of variable size, and in either case
4075 we will want to return it via memory and not in a register. */
4076 if (size < 0 || size > UNITS_PER_WORD)
4077 return true;
4079 if (TREE_CODE (type) == RECORD_TYPE)
4081 tree field;
4083 /* For a struct the APCS says that we only return in a register
4084 if the type is 'integer like' and every addressable element
4085 has an offset of zero. For practical purposes this means
4086 that the structure can have at most one non bit-field element
4087 and that this element must be the first one in the structure. */
4089 /* Find the first field, ignoring non FIELD_DECL things which will
4090 have been created by C++. */
4091 for (field = TYPE_FIELDS (type);
4092 field && TREE_CODE (field) != FIELD_DECL;
4093 field = DECL_CHAIN (field))
4094 continue;
4096 if (field == NULL)
4097 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4099 /* Check that the first field is valid for returning in a register. */
4101 /* ... Floats are not allowed */
4102 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4103 return true;
4105 /* ... Aggregates that are not themselves valid for returning in
4106 a register are not allowed. */
4107 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4108 return true;
4110 /* Now check the remaining fields, if any. Only bitfields are allowed,
4111 since they are not addressable. */
4112 for (field = DECL_CHAIN (field);
4113 field;
4114 field = DECL_CHAIN (field))
4116 if (TREE_CODE (field) != FIELD_DECL)
4117 continue;
4119 if (!DECL_BIT_FIELD_TYPE (field))
4120 return true;
4123 return false;
4126 if (TREE_CODE (type) == UNION_TYPE)
4128 tree field;
4130 /* Unions can be returned in registers if every element is
4131 integral, or can be returned in an integer register. */
4132 for (field = TYPE_FIELDS (type);
4133 field;
4134 field = DECL_CHAIN (field))
4136 if (TREE_CODE (field) != FIELD_DECL)
4137 continue;
4139 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4140 return true;
4142 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4143 return true;
4146 return false;
4148 #endif /* not ARM_WINCE */
4150 /* Return all other types in memory. */
4151 return true;
4154 const struct pcs_attribute_arg
4156 const char *arg;
4157 enum arm_pcs value;
4158 } pcs_attribute_args[] =
4160 {"aapcs", ARM_PCS_AAPCS},
4161 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4162 #if 0
4163 /* We could recognize these, but changes would be needed elsewhere
4164 * to implement them. */
4165 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4166 {"atpcs", ARM_PCS_ATPCS},
4167 {"apcs", ARM_PCS_APCS},
4168 #endif
4169 {NULL, ARM_PCS_UNKNOWN}
4172 static enum arm_pcs
4173 arm_pcs_from_attribute (tree attr)
4175 const struct pcs_attribute_arg *ptr;
4176 const char *arg;
4178 /* Get the value of the argument. */
4179 if (TREE_VALUE (attr) == NULL_TREE
4180 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4181 return ARM_PCS_UNKNOWN;
4183 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4185 /* Check it against the list of known arguments. */
4186 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4187 if (streq (arg, ptr->arg))
4188 return ptr->value;
4190 /* An unrecognized interrupt type. */
4191 return ARM_PCS_UNKNOWN;
4194 /* Get the PCS variant to use for this call. TYPE is the function's type
4195 specification, DECL is the specific declartion. DECL may be null if
4196 the call could be indirect or if this is a library call. */
4197 static enum arm_pcs
4198 arm_get_pcs_model (const_tree type, const_tree decl)
4200 bool user_convention = false;
4201 enum arm_pcs user_pcs = arm_pcs_default;
4202 tree attr;
4204 gcc_assert (type);
4206 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4207 if (attr)
4209 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4210 user_convention = true;
4213 if (TARGET_AAPCS_BASED)
4215 /* Detect varargs functions. These always use the base rules
4216 (no argument is ever a candidate for a co-processor
4217 register). */
4218 bool base_rules = stdarg_p (type);
4220 if (user_convention)
4222 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4223 sorry ("non-AAPCS derived PCS variant");
4224 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4225 error ("variadic functions must use the base AAPCS variant");
4228 if (base_rules)
4229 return ARM_PCS_AAPCS;
4230 else if (user_convention)
4231 return user_pcs;
4232 else if (decl && flag_unit_at_a_time)
4234 /* Local functions never leak outside this compilation unit,
4235 so we are free to use whatever conventions are
4236 appropriate. */
4237 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4238 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4239 if (i && i->local)
4240 return ARM_PCS_AAPCS_LOCAL;
4243 else if (user_convention && user_pcs != arm_pcs_default)
4244 sorry ("PCS variant");
4246 /* For everything else we use the target's default. */
4247 return arm_pcs_default;
4251 static void
4252 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4253 const_tree fntype ATTRIBUTE_UNUSED,
4254 rtx libcall ATTRIBUTE_UNUSED,
4255 const_tree fndecl ATTRIBUTE_UNUSED)
4257 /* Record the unallocated VFP registers. */
4258 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4259 pcum->aapcs_vfp_reg_alloc = 0;
4262 /* Walk down the type tree of TYPE counting consecutive base elements.
4263 If *MODEP is VOIDmode, then set it to the first valid floating point
4264 type. If a non-floating point type is found, or if a floating point
4265 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4266 otherwise return the count in the sub-tree. */
4267 static int
4268 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4270 enum machine_mode mode;
4271 HOST_WIDE_INT size;
4273 switch (TREE_CODE (type))
4275 case REAL_TYPE:
4276 mode = TYPE_MODE (type);
4277 if (mode != DFmode && mode != SFmode)
4278 return -1;
4280 if (*modep == VOIDmode)
4281 *modep = mode;
4283 if (*modep == mode)
4284 return 1;
4286 break;
4288 case COMPLEX_TYPE:
4289 mode = TYPE_MODE (TREE_TYPE (type));
4290 if (mode != DFmode && mode != SFmode)
4291 return -1;
4293 if (*modep == VOIDmode)
4294 *modep = mode;
4296 if (*modep == mode)
4297 return 2;
4299 break;
4301 case VECTOR_TYPE:
4302 /* Use V2SImode and V4SImode as representatives of all 64-bit
4303 and 128-bit vector types, whether or not those modes are
4304 supported with the present options. */
4305 size = int_size_in_bytes (type);
4306 switch (size)
4308 case 8:
4309 mode = V2SImode;
4310 break;
4311 case 16:
4312 mode = V4SImode;
4313 break;
4314 default:
4315 return -1;
4318 if (*modep == VOIDmode)
4319 *modep = mode;
4321 /* Vector modes are considered to be opaque: two vectors are
4322 equivalent for the purposes of being homogeneous aggregates
4323 if they are the same size. */
4324 if (*modep == mode)
4325 return 1;
4327 break;
4329 case ARRAY_TYPE:
4331 int count;
4332 tree index = TYPE_DOMAIN (type);
4334 /* Can't handle incomplete types. */
4335 if (!COMPLETE_TYPE_P (type))
4336 return -1;
4338 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4339 if (count == -1
4340 || !index
4341 || !TYPE_MAX_VALUE (index)
4342 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4343 || !TYPE_MIN_VALUE (index)
4344 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4345 || count < 0)
4346 return -1;
4348 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4349 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4351 /* There must be no padding. */
4352 if (!host_integerp (TYPE_SIZE (type), 1)
4353 || (tree_low_cst (TYPE_SIZE (type), 1)
4354 != count * GET_MODE_BITSIZE (*modep)))
4355 return -1;
4357 return count;
4360 case RECORD_TYPE:
4362 int count = 0;
4363 int sub_count;
4364 tree field;
4366 /* Can't handle incomplete types. */
4367 if (!COMPLETE_TYPE_P (type))
4368 return -1;
4370 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4372 if (TREE_CODE (field) != FIELD_DECL)
4373 continue;
4375 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4376 if (sub_count < 0)
4377 return -1;
4378 count += sub_count;
4381 /* There must be no padding. */
4382 if (!host_integerp (TYPE_SIZE (type), 1)
4383 || (tree_low_cst (TYPE_SIZE (type), 1)
4384 != count * GET_MODE_BITSIZE (*modep)))
4385 return -1;
4387 return count;
4390 case UNION_TYPE:
4391 case QUAL_UNION_TYPE:
4393 /* These aren't very interesting except in a degenerate case. */
4394 int count = 0;
4395 int sub_count;
4396 tree field;
4398 /* Can't handle incomplete types. */
4399 if (!COMPLETE_TYPE_P (type))
4400 return -1;
4402 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4404 if (TREE_CODE (field) != FIELD_DECL)
4405 continue;
4407 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4408 if (sub_count < 0)
4409 return -1;
4410 count = count > sub_count ? count : sub_count;
4413 /* There must be no padding. */
4414 if (!host_integerp (TYPE_SIZE (type), 1)
4415 || (tree_low_cst (TYPE_SIZE (type), 1)
4416 != count * GET_MODE_BITSIZE (*modep)))
4417 return -1;
4419 return count;
4422 default:
4423 break;
4426 return -1;
4429 /* Return true if PCS_VARIANT should use VFP registers. */
4430 static bool
4431 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4433 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4435 static bool seen_thumb1_vfp = false;
4437 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4439 sorry ("Thumb-1 hard-float VFP ABI");
4440 /* sorry() is not immediately fatal, so only display this once. */
4441 seen_thumb1_vfp = true;
4444 return true;
4447 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4448 return false;
4450 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4451 (TARGET_VFP_DOUBLE || !is_double));
4454 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4455 suitable for passing or returning in VFP registers for the PCS
4456 variant selected. If it is, then *BASE_MODE is updated to contain
4457 a machine mode describing each element of the argument's type and
4458 *COUNT to hold the number of such elements. */
4459 static bool
4460 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4461 enum machine_mode mode, const_tree type,
4462 enum machine_mode *base_mode, int *count)
4464 enum machine_mode new_mode = VOIDmode;
4466 /* If we have the type information, prefer that to working things
4467 out from the mode. */
4468 if (type)
4470 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4472 if (ag_count > 0 && ag_count <= 4)
4473 *count = ag_count;
4474 else
4475 return false;
4477 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4478 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4479 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4481 *count = 1;
4482 new_mode = mode;
4484 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4486 *count = 2;
4487 new_mode = (mode == DCmode ? DFmode : SFmode);
4489 else
4490 return false;
4493 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4494 return false;
4496 *base_mode = new_mode;
4497 return true;
4500 static bool
4501 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4502 enum machine_mode mode, const_tree type)
4504 int count ATTRIBUTE_UNUSED;
4505 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4507 if (!use_vfp_abi (pcs_variant, false))
4508 return false;
4509 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4510 &ag_mode, &count);
4513 static bool
4514 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4515 const_tree type)
4517 if (!use_vfp_abi (pcum->pcs_variant, false))
4518 return false;
4520 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4521 &pcum->aapcs_vfp_rmode,
4522 &pcum->aapcs_vfp_rcount);
4525 static bool
4526 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4527 const_tree type ATTRIBUTE_UNUSED)
4529 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4530 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4531 int regno;
4533 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4534 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4536 pcum->aapcs_vfp_reg_alloc = mask << regno;
4537 if (mode == BLKmode
4538 || (mode == TImode && ! TARGET_NEON)
4539 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4541 int i;
4542 int rcount = pcum->aapcs_vfp_rcount;
4543 int rshift = shift;
4544 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4545 rtx par;
4546 if (!TARGET_NEON)
4548 /* Avoid using unsupported vector modes. */
4549 if (rmode == V2SImode)
4550 rmode = DImode;
4551 else if (rmode == V4SImode)
4553 rmode = DImode;
4554 rcount *= 2;
4555 rshift /= 2;
4558 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4559 for (i = 0; i < rcount; i++)
4561 rtx tmp = gen_rtx_REG (rmode,
4562 FIRST_VFP_REGNUM + regno + i * rshift);
4563 tmp = gen_rtx_EXPR_LIST
4564 (VOIDmode, tmp,
4565 GEN_INT (i * GET_MODE_SIZE (rmode)));
4566 XVECEXP (par, 0, i) = tmp;
4569 pcum->aapcs_reg = par;
4571 else
4572 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4573 return true;
4575 return false;
4578 static rtx
4579 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4580 enum machine_mode mode,
4581 const_tree type ATTRIBUTE_UNUSED)
4583 if (!use_vfp_abi (pcs_variant, false))
4584 return NULL;
4586 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4588 int count;
4589 enum machine_mode ag_mode;
4590 int i;
4591 rtx par;
4592 int shift;
4594 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4595 &ag_mode, &count);
4597 if (!TARGET_NEON)
4599 if (ag_mode == V2SImode)
4600 ag_mode = DImode;
4601 else if (ag_mode == V4SImode)
4603 ag_mode = DImode;
4604 count *= 2;
4607 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4608 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4609 for (i = 0; i < count; i++)
4611 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4612 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4613 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4614 XVECEXP (par, 0, i) = tmp;
4617 return par;
4620 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4623 static void
4624 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4625 enum machine_mode mode ATTRIBUTE_UNUSED,
4626 const_tree type ATTRIBUTE_UNUSED)
4628 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4629 pcum->aapcs_vfp_reg_alloc = 0;
4630 return;
4633 #define AAPCS_CP(X) \
4635 aapcs_ ## X ## _cum_init, \
4636 aapcs_ ## X ## _is_call_candidate, \
4637 aapcs_ ## X ## _allocate, \
4638 aapcs_ ## X ## _is_return_candidate, \
4639 aapcs_ ## X ## _allocate_return_reg, \
4640 aapcs_ ## X ## _advance \
4643 /* Table of co-processors that can be used to pass arguments in
4644 registers. Idealy no arugment should be a candidate for more than
4645 one co-processor table entry, but the table is processed in order
4646 and stops after the first match. If that entry then fails to put
4647 the argument into a co-processor register, the argument will go on
4648 the stack. */
4649 static struct
4651 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4652 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4654 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4655 BLKmode) is a candidate for this co-processor's registers; this
4656 function should ignore any position-dependent state in
4657 CUMULATIVE_ARGS and only use call-type dependent information. */
4658 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4660 /* Return true if the argument does get a co-processor register; it
4661 should set aapcs_reg to an RTX of the register allocated as is
4662 required for a return from FUNCTION_ARG. */
4663 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4665 /* Return true if a result of mode MODE (or type TYPE if MODE is
4666 BLKmode) is can be returned in this co-processor's registers. */
4667 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4669 /* Allocate and return an RTX element to hold the return type of a
4670 call, this routine must not fail and will only be called if
4671 is_return_candidate returned true with the same parameters. */
4672 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4674 /* Finish processing this argument and prepare to start processing
4675 the next one. */
4676 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4677 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4679 AAPCS_CP(vfp)
4682 #undef AAPCS_CP
4684 static int
4685 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4686 const_tree type)
4688 int i;
4690 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4691 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4692 return i;
4694 return -1;
4697 static int
4698 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4700 /* We aren't passed a decl, so we can't check that a call is local.
4701 However, it isn't clear that that would be a win anyway, since it
4702 might limit some tail-calling opportunities. */
4703 enum arm_pcs pcs_variant;
4705 if (fntype)
4707 const_tree fndecl = NULL_TREE;
4709 if (TREE_CODE (fntype) == FUNCTION_DECL)
4711 fndecl = fntype;
4712 fntype = TREE_TYPE (fntype);
4715 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4717 else
4718 pcs_variant = arm_pcs_default;
4720 if (pcs_variant != ARM_PCS_AAPCS)
4722 int i;
4724 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4725 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4726 TYPE_MODE (type),
4727 type))
4728 return i;
4730 return -1;
4733 static rtx
4734 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4735 const_tree fntype)
4737 /* We aren't passed a decl, so we can't check that a call is local.
4738 However, it isn't clear that that would be a win anyway, since it
4739 might limit some tail-calling opportunities. */
4740 enum arm_pcs pcs_variant;
4741 int unsignedp ATTRIBUTE_UNUSED;
4743 if (fntype)
4745 const_tree fndecl = NULL_TREE;
4747 if (TREE_CODE (fntype) == FUNCTION_DECL)
4749 fndecl = fntype;
4750 fntype = TREE_TYPE (fntype);
4753 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4755 else
4756 pcs_variant = arm_pcs_default;
4758 /* Promote integer types. */
4759 if (type && INTEGRAL_TYPE_P (type))
4760 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4762 if (pcs_variant != ARM_PCS_AAPCS)
4764 int i;
4766 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4767 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4768 type))
4769 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4770 mode, type);
4773 /* Promotes small structs returned in a register to full-word size
4774 for big-endian AAPCS. */
4775 if (type && arm_return_in_msb (type))
4777 HOST_WIDE_INT size = int_size_in_bytes (type);
4778 if (size % UNITS_PER_WORD != 0)
4780 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4781 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4785 return gen_rtx_REG (mode, R0_REGNUM);
4788 static rtx
4789 aapcs_libcall_value (enum machine_mode mode)
4791 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4792 && GET_MODE_SIZE (mode) <= 4)
4793 mode = SImode;
4795 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4798 /* Lay out a function argument using the AAPCS rules. The rule
4799 numbers referred to here are those in the AAPCS. */
4800 static void
4801 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4802 const_tree type, bool named)
4804 int nregs, nregs2;
4805 int ncrn;
4807 /* We only need to do this once per argument. */
4808 if (pcum->aapcs_arg_processed)
4809 return;
4811 pcum->aapcs_arg_processed = true;
4813 /* Special case: if named is false then we are handling an incoming
4814 anonymous argument which is on the stack. */
4815 if (!named)
4816 return;
4818 /* Is this a potential co-processor register candidate? */
4819 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4821 int slot = aapcs_select_call_coproc (pcum, mode, type);
4822 pcum->aapcs_cprc_slot = slot;
4824 /* We don't have to apply any of the rules from part B of the
4825 preparation phase, these are handled elsewhere in the
4826 compiler. */
4828 if (slot >= 0)
4830 /* A Co-processor register candidate goes either in its own
4831 class of registers or on the stack. */
4832 if (!pcum->aapcs_cprc_failed[slot])
4834 /* C1.cp - Try to allocate the argument to co-processor
4835 registers. */
4836 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4837 return;
4839 /* C2.cp - Put the argument on the stack and note that we
4840 can't assign any more candidates in this slot. We also
4841 need to note that we have allocated stack space, so that
4842 we won't later try to split a non-cprc candidate between
4843 core registers and the stack. */
4844 pcum->aapcs_cprc_failed[slot] = true;
4845 pcum->can_split = false;
4848 /* We didn't get a register, so this argument goes on the
4849 stack. */
4850 gcc_assert (pcum->can_split == false);
4851 return;
4855 /* C3 - For double-word aligned arguments, round the NCRN up to the
4856 next even number. */
4857 ncrn = pcum->aapcs_ncrn;
4858 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4859 ncrn++;
4861 nregs = ARM_NUM_REGS2(mode, type);
4863 /* Sigh, this test should really assert that nregs > 0, but a GCC
4864 extension allows empty structs and then gives them empty size; it
4865 then allows such a structure to be passed by value. For some of
4866 the code below we have to pretend that such an argument has
4867 non-zero size so that we 'locate' it correctly either in
4868 registers or on the stack. */
4869 gcc_assert (nregs >= 0);
4871 nregs2 = nregs ? nregs : 1;
4873 /* C4 - Argument fits entirely in core registers. */
4874 if (ncrn + nregs2 <= NUM_ARG_REGS)
4876 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4877 pcum->aapcs_next_ncrn = ncrn + nregs;
4878 return;
4881 /* C5 - Some core registers left and there are no arguments already
4882 on the stack: split this argument between the remaining core
4883 registers and the stack. */
4884 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4886 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4887 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4888 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4889 return;
4892 /* C6 - NCRN is set to 4. */
4893 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4895 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4896 return;
4899 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4900 for a call to a function whose data type is FNTYPE.
4901 For a library call, FNTYPE is NULL. */
4902 void
4903 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4904 rtx libname,
4905 tree fndecl ATTRIBUTE_UNUSED)
4907 /* Long call handling. */
4908 if (fntype)
4909 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4910 else
4911 pcum->pcs_variant = arm_pcs_default;
4913 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4915 if (arm_libcall_uses_aapcs_base (libname))
4916 pcum->pcs_variant = ARM_PCS_AAPCS;
4918 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4919 pcum->aapcs_reg = NULL_RTX;
4920 pcum->aapcs_partial = 0;
4921 pcum->aapcs_arg_processed = false;
4922 pcum->aapcs_cprc_slot = -1;
4923 pcum->can_split = true;
4925 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4927 int i;
4929 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4931 pcum->aapcs_cprc_failed[i] = false;
4932 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4935 return;
4938 /* Legacy ABIs */
4940 /* On the ARM, the offset starts at 0. */
4941 pcum->nregs = 0;
4942 pcum->iwmmxt_nregs = 0;
4943 pcum->can_split = true;
4945 /* Varargs vectors are treated the same as long long.
4946 named_count avoids having to change the way arm handles 'named' */
4947 pcum->named_count = 0;
4948 pcum->nargs = 0;
4950 if (TARGET_REALLY_IWMMXT && fntype)
4952 tree fn_arg;
4954 for (fn_arg = TYPE_ARG_TYPES (fntype);
4955 fn_arg;
4956 fn_arg = TREE_CHAIN (fn_arg))
4957 pcum->named_count += 1;
4959 if (! pcum->named_count)
4960 pcum->named_count = INT_MAX;
4965 /* Return true if mode/type need doubleword alignment. */
4966 static bool
4967 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4969 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4970 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4974 /* Determine where to put an argument to a function.
4975 Value is zero to push the argument on the stack,
4976 or a hard register in which to store the argument.
4978 MODE is the argument's machine mode.
4979 TYPE is the data type of the argument (as a tree).
4980 This is null for libcalls where that information may
4981 not be available.
4982 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4983 the preceding args and about the function being called.
4984 NAMED is nonzero if this argument is a named parameter
4985 (otherwise it is an extra parameter matching an ellipsis).
4987 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4988 other arguments are passed on the stack. If (NAMED == 0) (which happens
4989 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4990 defined), say it is passed in the stack (function_prologue will
4991 indeed make it pass in the stack if necessary). */
4993 static rtx
4994 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4995 const_tree type, bool named)
4997 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4998 int nregs;
5000 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5001 a call insn (op3 of a call_value insn). */
5002 if (mode == VOIDmode)
5003 return const0_rtx;
5005 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5007 aapcs_layout_arg (pcum, mode, type, named);
5008 return pcum->aapcs_reg;
5011 /* Varargs vectors are treated the same as long long.
5012 named_count avoids having to change the way arm handles 'named' */
5013 if (TARGET_IWMMXT_ABI
5014 && arm_vector_mode_supported_p (mode)
5015 && pcum->named_count > pcum->nargs + 1)
5017 if (pcum->iwmmxt_nregs <= 9)
5018 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5019 else
5021 pcum->can_split = false;
5022 return NULL_RTX;
5026 /* Put doubleword aligned quantities in even register pairs. */
5027 if (pcum->nregs & 1
5028 && ARM_DOUBLEWORD_ALIGN
5029 && arm_needs_doubleword_align (mode, type))
5030 pcum->nregs++;
5032 /* Only allow splitting an arg between regs and memory if all preceding
5033 args were allocated to regs. For args passed by reference we only count
5034 the reference pointer. */
5035 if (pcum->can_split)
5036 nregs = 1;
5037 else
5038 nregs = ARM_NUM_REGS2 (mode, type);
5040 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5041 return NULL_RTX;
5043 return gen_rtx_REG (mode, pcum->nregs);
5046 static unsigned int
5047 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5049 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5050 ? DOUBLEWORD_ALIGNMENT
5051 : PARM_BOUNDARY);
5054 static int
5055 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5056 tree type, bool named)
5058 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5059 int nregs = pcum->nregs;
5061 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5063 aapcs_layout_arg (pcum, mode, type, named);
5064 return pcum->aapcs_partial;
5067 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5068 return 0;
5070 if (NUM_ARG_REGS > nregs
5071 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5072 && pcum->can_split)
5073 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5075 return 0;
5078 /* Update the data in PCUM to advance over an argument
5079 of mode MODE and data type TYPE.
5080 (TYPE is null for libcalls where that information may not be available.) */
5082 static void
5083 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5084 const_tree type, bool named)
5086 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5088 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5090 aapcs_layout_arg (pcum, mode, type, named);
5092 if (pcum->aapcs_cprc_slot >= 0)
5094 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5095 type);
5096 pcum->aapcs_cprc_slot = -1;
5099 /* Generic stuff. */
5100 pcum->aapcs_arg_processed = false;
5101 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5102 pcum->aapcs_reg = NULL_RTX;
5103 pcum->aapcs_partial = 0;
5105 else
5107 pcum->nargs += 1;
5108 if (arm_vector_mode_supported_p (mode)
5109 && pcum->named_count > pcum->nargs
5110 && TARGET_IWMMXT_ABI)
5111 pcum->iwmmxt_nregs += 1;
5112 else
5113 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5117 /* Variable sized types are passed by reference. This is a GCC
5118 extension to the ARM ABI. */
5120 static bool
5121 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5122 enum machine_mode mode ATTRIBUTE_UNUSED,
5123 const_tree type, bool named ATTRIBUTE_UNUSED)
5125 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5128 /* Encode the current state of the #pragma [no_]long_calls. */
5129 typedef enum
5131 OFF, /* No #pragma [no_]long_calls is in effect. */
5132 LONG, /* #pragma long_calls is in effect. */
5133 SHORT /* #pragma no_long_calls is in effect. */
5134 } arm_pragma_enum;
5136 static arm_pragma_enum arm_pragma_long_calls = OFF;
5138 void
5139 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5141 arm_pragma_long_calls = LONG;
5144 void
5145 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5147 arm_pragma_long_calls = SHORT;
5150 void
5151 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5153 arm_pragma_long_calls = OFF;
5156 /* Handle an attribute requiring a FUNCTION_DECL;
5157 arguments as in struct attribute_spec.handler. */
5158 static tree
5159 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5160 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5162 if (TREE_CODE (*node) != FUNCTION_DECL)
5164 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5165 name);
5166 *no_add_attrs = true;
5169 return NULL_TREE;
5172 /* Handle an "interrupt" or "isr" attribute;
5173 arguments as in struct attribute_spec.handler. */
5174 static tree
5175 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5176 bool *no_add_attrs)
5178 if (DECL_P (*node))
5180 if (TREE_CODE (*node) != FUNCTION_DECL)
5182 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5183 name);
5184 *no_add_attrs = true;
5186 /* FIXME: the argument if any is checked for type attributes;
5187 should it be checked for decl ones? */
5189 else
5191 if (TREE_CODE (*node) == FUNCTION_TYPE
5192 || TREE_CODE (*node) == METHOD_TYPE)
5194 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5196 warning (OPT_Wattributes, "%qE attribute ignored",
5197 name);
5198 *no_add_attrs = true;
5201 else if (TREE_CODE (*node) == POINTER_TYPE
5202 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5203 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5204 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5206 *node = build_variant_type_copy (*node);
5207 TREE_TYPE (*node) = build_type_attribute_variant
5208 (TREE_TYPE (*node),
5209 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5210 *no_add_attrs = true;
5212 else
5214 /* Possibly pass this attribute on from the type to a decl. */
5215 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5216 | (int) ATTR_FLAG_FUNCTION_NEXT
5217 | (int) ATTR_FLAG_ARRAY_NEXT))
5219 *no_add_attrs = true;
5220 return tree_cons (name, args, NULL_TREE);
5222 else
5224 warning (OPT_Wattributes, "%qE attribute ignored",
5225 name);
5230 return NULL_TREE;
5233 /* Handle a "pcs" attribute; arguments as in struct
5234 attribute_spec.handler. */
5235 static tree
5236 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5237 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5239 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5241 warning (OPT_Wattributes, "%qE attribute ignored", name);
5242 *no_add_attrs = true;
5244 return NULL_TREE;
5247 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5248 /* Handle the "notshared" attribute. This attribute is another way of
5249 requesting hidden visibility. ARM's compiler supports
5250 "__declspec(notshared)"; we support the same thing via an
5251 attribute. */
5253 static tree
5254 arm_handle_notshared_attribute (tree *node,
5255 tree name ATTRIBUTE_UNUSED,
5256 tree args ATTRIBUTE_UNUSED,
5257 int flags ATTRIBUTE_UNUSED,
5258 bool *no_add_attrs)
5260 tree decl = TYPE_NAME (*node);
5262 if (decl)
5264 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5265 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5266 *no_add_attrs = false;
5268 return NULL_TREE;
5270 #endif
5272 /* Return 0 if the attributes for two types are incompatible, 1 if they
5273 are compatible, and 2 if they are nearly compatible (which causes a
5274 warning to be generated). */
5275 static int
5276 arm_comp_type_attributes (const_tree type1, const_tree type2)
5278 int l1, l2, s1, s2;
5280 /* Check for mismatch of non-default calling convention. */
5281 if (TREE_CODE (type1) != FUNCTION_TYPE)
5282 return 1;
5284 /* Check for mismatched call attributes. */
5285 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5286 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5287 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5288 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5290 /* Only bother to check if an attribute is defined. */
5291 if (l1 | l2 | s1 | s2)
5293 /* If one type has an attribute, the other must have the same attribute. */
5294 if ((l1 != l2) || (s1 != s2))
5295 return 0;
5297 /* Disallow mixed attributes. */
5298 if ((l1 & s2) || (l2 & s1))
5299 return 0;
5302 /* Check for mismatched ISR attribute. */
5303 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5304 if (! l1)
5305 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5306 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5307 if (! l2)
5308 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5309 if (l1 != l2)
5310 return 0;
5312 return 1;
5315 /* Assigns default attributes to newly defined type. This is used to
5316 set short_call/long_call attributes for function types of
5317 functions defined inside corresponding #pragma scopes. */
5318 static void
5319 arm_set_default_type_attributes (tree type)
5321 /* Add __attribute__ ((long_call)) to all functions, when
5322 inside #pragma long_calls or __attribute__ ((short_call)),
5323 when inside #pragma no_long_calls. */
5324 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5326 tree type_attr_list, attr_name;
5327 type_attr_list = TYPE_ATTRIBUTES (type);
5329 if (arm_pragma_long_calls == LONG)
5330 attr_name = get_identifier ("long_call");
5331 else if (arm_pragma_long_calls == SHORT)
5332 attr_name = get_identifier ("short_call");
5333 else
5334 return;
5336 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5337 TYPE_ATTRIBUTES (type) = type_attr_list;
5341 /* Return true if DECL is known to be linked into section SECTION. */
5343 static bool
5344 arm_function_in_section_p (tree decl, section *section)
5346 /* We can only be certain about functions defined in the same
5347 compilation unit. */
5348 if (!TREE_STATIC (decl))
5349 return false;
5351 /* Make sure that SYMBOL always binds to the definition in this
5352 compilation unit. */
5353 if (!targetm.binds_local_p (decl))
5354 return false;
5356 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5357 if (!DECL_SECTION_NAME (decl))
5359 /* Make sure that we will not create a unique section for DECL. */
5360 if (flag_function_sections || DECL_ONE_ONLY (decl))
5361 return false;
5364 return function_section (decl) == section;
5367 /* Return nonzero if a 32-bit "long_call" should be generated for
5368 a call from the current function to DECL. We generate a long_call
5369 if the function:
5371 a. has an __attribute__((long call))
5372 or b. is within the scope of a #pragma long_calls
5373 or c. the -mlong-calls command line switch has been specified
5375 However we do not generate a long call if the function:
5377 d. has an __attribute__ ((short_call))
5378 or e. is inside the scope of a #pragma no_long_calls
5379 or f. is defined in the same section as the current function. */
5381 bool
5382 arm_is_long_call_p (tree decl)
5384 tree attrs;
5386 if (!decl)
5387 return TARGET_LONG_CALLS;
5389 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5390 if (lookup_attribute ("short_call", attrs))
5391 return false;
5393 /* For "f", be conservative, and only cater for cases in which the
5394 whole of the current function is placed in the same section. */
5395 if (!flag_reorder_blocks_and_partition
5396 && TREE_CODE (decl) == FUNCTION_DECL
5397 && arm_function_in_section_p (decl, current_function_section ()))
5398 return false;
5400 if (lookup_attribute ("long_call", attrs))
5401 return true;
5403 return TARGET_LONG_CALLS;
5406 /* Return nonzero if it is ok to make a tail-call to DECL. */
5407 static bool
5408 arm_function_ok_for_sibcall (tree decl, tree exp)
5410 unsigned long func_type;
5412 if (cfun->machine->sibcall_blocked)
5413 return false;
5415 /* Never tailcall something if we are generating code for Thumb-1. */
5416 if (TARGET_THUMB1)
5417 return false;
5419 /* The PIC register is live on entry to VxWorks PLT entries, so we
5420 must make the call before restoring the PIC register. */
5421 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5422 return false;
5424 /* Cannot tail-call to long calls, since these are out of range of
5425 a branch instruction. */
5426 if (decl && arm_is_long_call_p (decl))
5427 return false;
5429 /* If we are interworking and the function is not declared static
5430 then we can't tail-call it unless we know that it exists in this
5431 compilation unit (since it might be a Thumb routine). */
5432 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5433 && !TREE_ASM_WRITTEN (decl))
5434 return false;
5436 func_type = arm_current_func_type ();
5437 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5438 if (IS_INTERRUPT (func_type))
5439 return false;
5441 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5443 /* Check that the return value locations are the same. For
5444 example that we aren't returning a value from the sibling in
5445 a VFP register but then need to transfer it to a core
5446 register. */
5447 rtx a, b;
5449 a = arm_function_value (TREE_TYPE (exp), decl, false);
5450 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5451 cfun->decl, false);
5452 if (!rtx_equal_p (a, b))
5453 return false;
5456 /* Never tailcall if function may be called with a misaligned SP. */
5457 if (IS_STACKALIGN (func_type))
5458 return false;
5460 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5461 references should become a NOP. Don't convert such calls into
5462 sibling calls. */
5463 if (TARGET_AAPCS_BASED
5464 && arm_abi == ARM_ABI_AAPCS
5465 && decl
5466 && DECL_WEAK (decl))
5467 return false;
5469 /* Everything else is ok. */
5470 return true;
5474 /* Addressing mode support functions. */
5476 /* Return nonzero if X is a legitimate immediate operand when compiling
5477 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5479 legitimate_pic_operand_p (rtx x)
5481 if (GET_CODE (x) == SYMBOL_REF
5482 || (GET_CODE (x) == CONST
5483 && GET_CODE (XEXP (x, 0)) == PLUS
5484 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5485 return 0;
5487 return 1;
5490 /* Record that the current function needs a PIC register. Initialize
5491 cfun->machine->pic_reg if we have not already done so. */
5493 static void
5494 require_pic_register (void)
5496 /* A lot of the logic here is made obscure by the fact that this
5497 routine gets called as part of the rtx cost estimation process.
5498 We don't want those calls to affect any assumptions about the real
5499 function; and further, we can't call entry_of_function() until we
5500 start the real expansion process. */
5501 if (!crtl->uses_pic_offset_table)
5503 gcc_assert (can_create_pseudo_p ());
5504 if (arm_pic_register != INVALID_REGNUM
5505 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
5507 if (!cfun->machine->pic_reg)
5508 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5510 /* Play games to avoid marking the function as needing pic
5511 if we are being called as part of the cost-estimation
5512 process. */
5513 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5514 crtl->uses_pic_offset_table = 1;
5516 else
5518 rtx seq, insn;
5520 if (!cfun->machine->pic_reg)
5521 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5523 /* Play games to avoid marking the function as needing pic
5524 if we are being called as part of the cost-estimation
5525 process. */
5526 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5528 crtl->uses_pic_offset_table = 1;
5529 start_sequence ();
5531 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
5532 && arm_pic_register > LAST_LO_REGNUM)
5533 emit_move_insn (cfun->machine->pic_reg,
5534 gen_rtx_REG (Pmode, arm_pic_register));
5535 else
5536 arm_load_pic_register (0UL);
5538 seq = get_insns ();
5539 end_sequence ();
5541 for (insn = seq; insn; insn = NEXT_INSN (insn))
5542 if (INSN_P (insn))
5543 INSN_LOCATION (insn) = prologue_location;
5545 /* We can be called during expansion of PHI nodes, where
5546 we can't yet emit instructions directly in the final
5547 insn stream. Queue the insns on the entry edge, they will
5548 be committed after everything else is expanded. */
5549 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5556 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5558 if (GET_CODE (orig) == SYMBOL_REF
5559 || GET_CODE (orig) == LABEL_REF)
5561 rtx insn;
5563 if (reg == 0)
5565 gcc_assert (can_create_pseudo_p ());
5566 reg = gen_reg_rtx (Pmode);
5569 /* VxWorks does not impose a fixed gap between segments; the run-time
5570 gap can be different from the object-file gap. We therefore can't
5571 use GOTOFF unless we are absolutely sure that the symbol is in the
5572 same segment as the GOT. Unfortunately, the flexibility of linker
5573 scripts means that we can't be sure of that in general, so assume
5574 that GOTOFF is never valid on VxWorks. */
5575 if ((GET_CODE (orig) == LABEL_REF
5576 || (GET_CODE (orig) == SYMBOL_REF &&
5577 SYMBOL_REF_LOCAL_P (orig)))
5578 && NEED_GOT_RELOC
5579 && !TARGET_VXWORKS_RTP)
5580 insn = arm_pic_static_addr (orig, reg);
5581 else
5583 rtx pat;
5584 rtx mem;
5586 /* If this function doesn't have a pic register, create one now. */
5587 require_pic_register ();
5589 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5591 /* Make the MEM as close to a constant as possible. */
5592 mem = SET_SRC (pat);
5593 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5594 MEM_READONLY_P (mem) = 1;
5595 MEM_NOTRAP_P (mem) = 1;
5597 insn = emit_insn (pat);
5600 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5601 by loop. */
5602 set_unique_reg_note (insn, REG_EQUAL, orig);
5604 return reg;
5606 else if (GET_CODE (orig) == CONST)
5608 rtx base, offset;
5610 if (GET_CODE (XEXP (orig, 0)) == PLUS
5611 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5612 return orig;
5614 /* Handle the case where we have: const (UNSPEC_TLS). */
5615 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5616 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5617 return orig;
5619 /* Handle the case where we have:
5620 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5621 CONST_INT. */
5622 if (GET_CODE (XEXP (orig, 0)) == PLUS
5623 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5624 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5626 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5627 return orig;
5630 if (reg == 0)
5632 gcc_assert (can_create_pseudo_p ());
5633 reg = gen_reg_rtx (Pmode);
5636 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5638 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5639 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5640 base == reg ? 0 : reg);
5642 if (CONST_INT_P (offset))
5644 /* The base register doesn't really matter, we only want to
5645 test the index for the appropriate mode. */
5646 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5648 gcc_assert (can_create_pseudo_p ());
5649 offset = force_reg (Pmode, offset);
5652 if (CONST_INT_P (offset))
5653 return plus_constant (Pmode, base, INTVAL (offset));
5656 if (GET_MODE_SIZE (mode) > 4
5657 && (GET_MODE_CLASS (mode) == MODE_INT
5658 || TARGET_SOFT_FLOAT))
5660 emit_insn (gen_addsi3 (reg, base, offset));
5661 return reg;
5664 return gen_rtx_PLUS (Pmode, base, offset);
5667 return orig;
5671 /* Find a spare register to use during the prolog of a function. */
5673 static int
5674 thumb_find_work_register (unsigned long pushed_regs_mask)
5676 int reg;
5678 /* Check the argument registers first as these are call-used. The
5679 register allocation order means that sometimes r3 might be used
5680 but earlier argument registers might not, so check them all. */
5681 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5682 if (!df_regs_ever_live_p (reg))
5683 return reg;
5685 /* Before going on to check the call-saved registers we can try a couple
5686 more ways of deducing that r3 is available. The first is when we are
5687 pushing anonymous arguments onto the stack and we have less than 4
5688 registers worth of fixed arguments(*). In this case r3 will be part of
5689 the variable argument list and so we can be sure that it will be
5690 pushed right at the start of the function. Hence it will be available
5691 for the rest of the prologue.
5692 (*): ie crtl->args.pretend_args_size is greater than 0. */
5693 if (cfun->machine->uses_anonymous_args
5694 && crtl->args.pretend_args_size > 0)
5695 return LAST_ARG_REGNUM;
5697 /* The other case is when we have fixed arguments but less than 4 registers
5698 worth. In this case r3 might be used in the body of the function, but
5699 it is not being used to convey an argument into the function. In theory
5700 we could just check crtl->args.size to see how many bytes are
5701 being passed in argument registers, but it seems that it is unreliable.
5702 Sometimes it will have the value 0 when in fact arguments are being
5703 passed. (See testcase execute/20021111-1.c for an example). So we also
5704 check the args_info.nregs field as well. The problem with this field is
5705 that it makes no allowances for arguments that are passed to the
5706 function but which are not used. Hence we could miss an opportunity
5707 when a function has an unused argument in r3. But it is better to be
5708 safe than to be sorry. */
5709 if (! cfun->machine->uses_anonymous_args
5710 && crtl->args.size >= 0
5711 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5712 && (TARGET_AAPCS_BASED
5713 ? crtl->args.info.aapcs_ncrn < 4
5714 : crtl->args.info.nregs < 4))
5715 return LAST_ARG_REGNUM;
5717 /* Otherwise look for a call-saved register that is going to be pushed. */
5718 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5719 if (pushed_regs_mask & (1 << reg))
5720 return reg;
5722 if (TARGET_THUMB2)
5724 /* Thumb-2 can use high regs. */
5725 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5726 if (pushed_regs_mask & (1 << reg))
5727 return reg;
5729 /* Something went wrong - thumb_compute_save_reg_mask()
5730 should have arranged for a suitable register to be pushed. */
5731 gcc_unreachable ();
5734 static GTY(()) int pic_labelno;
5736 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5737 low register. */
5739 void
5740 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5742 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5744 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5745 return;
5747 gcc_assert (flag_pic);
5749 pic_reg = cfun->machine->pic_reg;
5750 if (TARGET_VXWORKS_RTP)
5752 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5753 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5754 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5756 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5758 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5759 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5761 else
5763 /* We use an UNSPEC rather than a LABEL_REF because this label
5764 never appears in the code stream. */
5766 labelno = GEN_INT (pic_labelno++);
5767 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5768 l1 = gen_rtx_CONST (VOIDmode, l1);
5770 /* On the ARM the PC register contains 'dot + 8' at the time of the
5771 addition, on the Thumb it is 'dot + 4'. */
5772 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5773 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5774 UNSPEC_GOTSYM_OFF);
5775 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5777 if (TARGET_32BIT)
5779 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5781 else /* TARGET_THUMB1 */
5783 if (arm_pic_register != INVALID_REGNUM
5784 && REGNO (pic_reg) > LAST_LO_REGNUM)
5786 /* We will have pushed the pic register, so we should always be
5787 able to find a work register. */
5788 pic_tmp = gen_rtx_REG (SImode,
5789 thumb_find_work_register (saved_regs));
5790 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5791 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5792 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5794 else if (arm_pic_register != INVALID_REGNUM
5795 && arm_pic_register > LAST_LO_REGNUM
5796 && REGNO (pic_reg) <= LAST_LO_REGNUM)
5798 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5799 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
5800 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
5802 else
5803 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5807 /* Need to emit this whether or not we obey regdecls,
5808 since setjmp/longjmp can cause life info to screw up. */
5809 emit_use (pic_reg);
5812 /* Generate code to load the address of a static var when flag_pic is set. */
5813 static rtx
5814 arm_pic_static_addr (rtx orig, rtx reg)
5816 rtx l1, labelno, offset_rtx, insn;
5818 gcc_assert (flag_pic);
5820 /* We use an UNSPEC rather than a LABEL_REF because this label
5821 never appears in the code stream. */
5822 labelno = GEN_INT (pic_labelno++);
5823 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5824 l1 = gen_rtx_CONST (VOIDmode, l1);
5826 /* On the ARM the PC register contains 'dot + 8' at the time of the
5827 addition, on the Thumb it is 'dot + 4'. */
5828 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5829 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5830 UNSPEC_SYMBOL_OFFSET);
5831 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5833 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5834 return insn;
5837 /* Return nonzero if X is valid as an ARM state addressing register. */
5838 static int
5839 arm_address_register_rtx_p (rtx x, int strict_p)
5841 int regno;
5843 if (!REG_P (x))
5844 return 0;
5846 regno = REGNO (x);
5848 if (strict_p)
5849 return ARM_REGNO_OK_FOR_BASE_P (regno);
5851 return (regno <= LAST_ARM_REGNUM
5852 || regno >= FIRST_PSEUDO_REGISTER
5853 || regno == FRAME_POINTER_REGNUM
5854 || regno == ARG_POINTER_REGNUM);
5857 /* Return TRUE if this rtx is the difference of a symbol and a label,
5858 and will reduce to a PC-relative relocation in the object file.
5859 Expressions like this can be left alone when generating PIC, rather
5860 than forced through the GOT. */
5861 static int
5862 pcrel_constant_p (rtx x)
5864 if (GET_CODE (x) == MINUS)
5865 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5867 return FALSE;
5870 /* Return true if X will surely end up in an index register after next
5871 splitting pass. */
5872 static bool
5873 will_be_in_index_register (const_rtx x)
5875 /* arm.md: calculate_pic_address will split this into a register. */
5876 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5879 /* Return nonzero if X is a valid ARM state address operand. */
5881 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5882 int strict_p)
5884 bool use_ldrd;
5885 enum rtx_code code = GET_CODE (x);
5887 if (arm_address_register_rtx_p (x, strict_p))
5888 return 1;
5890 use_ldrd = (TARGET_LDRD
5891 && (mode == DImode
5892 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5894 if (code == POST_INC || code == PRE_DEC
5895 || ((code == PRE_INC || code == POST_DEC)
5896 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5897 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5899 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5900 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5901 && GET_CODE (XEXP (x, 1)) == PLUS
5902 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5904 rtx addend = XEXP (XEXP (x, 1), 1);
5906 /* Don't allow ldrd post increment by register because it's hard
5907 to fixup invalid register choices. */
5908 if (use_ldrd
5909 && GET_CODE (x) == POST_MODIFY
5910 && REG_P (addend))
5911 return 0;
5913 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5914 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5917 /* After reload constants split into minipools will have addresses
5918 from a LABEL_REF. */
5919 else if (reload_completed
5920 && (code == LABEL_REF
5921 || (code == CONST
5922 && GET_CODE (XEXP (x, 0)) == PLUS
5923 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5924 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5925 return 1;
5927 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5928 return 0;
5930 else if (code == PLUS)
5932 rtx xop0 = XEXP (x, 0);
5933 rtx xop1 = XEXP (x, 1);
5935 return ((arm_address_register_rtx_p (xop0, strict_p)
5936 && ((CONST_INT_P (xop1)
5937 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5938 || (!strict_p && will_be_in_index_register (xop1))))
5939 || (arm_address_register_rtx_p (xop1, strict_p)
5940 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5943 #if 0
5944 /* Reload currently can't handle MINUS, so disable this for now */
5945 else if (GET_CODE (x) == MINUS)
5947 rtx xop0 = XEXP (x, 0);
5948 rtx xop1 = XEXP (x, 1);
5950 return (arm_address_register_rtx_p (xop0, strict_p)
5951 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5953 #endif
5955 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5956 && code == SYMBOL_REF
5957 && CONSTANT_POOL_ADDRESS_P (x)
5958 && ! (flag_pic
5959 && symbol_mentioned_p (get_pool_constant (x))
5960 && ! pcrel_constant_p (get_pool_constant (x))))
5961 return 1;
5963 return 0;
5966 /* Return nonzero if X is a valid Thumb-2 address operand. */
5967 static int
5968 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5970 bool use_ldrd;
5971 enum rtx_code code = GET_CODE (x);
5973 if (arm_address_register_rtx_p (x, strict_p))
5974 return 1;
5976 use_ldrd = (TARGET_LDRD
5977 && (mode == DImode
5978 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5980 if (code == POST_INC || code == PRE_DEC
5981 || ((code == PRE_INC || code == POST_DEC)
5982 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5983 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5985 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5986 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5987 && GET_CODE (XEXP (x, 1)) == PLUS
5988 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5990 /* Thumb-2 only has autoincrement by constant. */
5991 rtx addend = XEXP (XEXP (x, 1), 1);
5992 HOST_WIDE_INT offset;
5994 if (!CONST_INT_P (addend))
5995 return 0;
5997 offset = INTVAL(addend);
5998 if (GET_MODE_SIZE (mode) <= 4)
5999 return (offset > -256 && offset < 256);
6001 return (use_ldrd && offset > -1024 && offset < 1024
6002 && (offset & 3) == 0);
6005 /* After reload constants split into minipools will have addresses
6006 from a LABEL_REF. */
6007 else if (reload_completed
6008 && (code == LABEL_REF
6009 || (code == CONST
6010 && GET_CODE (XEXP (x, 0)) == PLUS
6011 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6012 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6013 return 1;
6015 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6016 return 0;
6018 else if (code == PLUS)
6020 rtx xop0 = XEXP (x, 0);
6021 rtx xop1 = XEXP (x, 1);
6023 return ((arm_address_register_rtx_p (xop0, strict_p)
6024 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6025 || (!strict_p && will_be_in_index_register (xop1))))
6026 || (arm_address_register_rtx_p (xop1, strict_p)
6027 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6030 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6031 && code == SYMBOL_REF
6032 && CONSTANT_POOL_ADDRESS_P (x)
6033 && ! (flag_pic
6034 && symbol_mentioned_p (get_pool_constant (x))
6035 && ! pcrel_constant_p (get_pool_constant (x))))
6036 return 1;
6038 return 0;
6041 /* Return nonzero if INDEX is valid for an address index operand in
6042 ARM state. */
6043 static int
6044 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6045 int strict_p)
6047 HOST_WIDE_INT range;
6048 enum rtx_code code = GET_CODE (index);
6050 /* Standard coprocessor addressing modes. */
6051 if (TARGET_HARD_FLOAT
6052 && TARGET_VFP
6053 && (mode == SFmode || mode == DFmode))
6054 return (code == CONST_INT && INTVAL (index) < 1024
6055 && INTVAL (index) > -1024
6056 && (INTVAL (index) & 3) == 0);
6058 /* For quad modes, we restrict the constant offset to be slightly less
6059 than what the instruction format permits. We do this because for
6060 quad mode moves, we will actually decompose them into two separate
6061 double-mode reads or writes. INDEX must therefore be a valid
6062 (double-mode) offset and so should INDEX+8. */
6063 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6064 return (code == CONST_INT
6065 && INTVAL (index) < 1016
6066 && INTVAL (index) > -1024
6067 && (INTVAL (index) & 3) == 0);
6069 /* We have no such constraint on double mode offsets, so we permit the
6070 full range of the instruction format. */
6071 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6072 return (code == CONST_INT
6073 && INTVAL (index) < 1024
6074 && INTVAL (index) > -1024
6075 && (INTVAL (index) & 3) == 0);
6077 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6078 return (code == CONST_INT
6079 && INTVAL (index) < 1024
6080 && INTVAL (index) > -1024
6081 && (INTVAL (index) & 3) == 0);
6083 if (arm_address_register_rtx_p (index, strict_p)
6084 && (GET_MODE_SIZE (mode) <= 4))
6085 return 1;
6087 if (mode == DImode || mode == DFmode)
6089 if (code == CONST_INT)
6091 HOST_WIDE_INT val = INTVAL (index);
6093 if (TARGET_LDRD)
6094 return val > -256 && val < 256;
6095 else
6096 return val > -4096 && val < 4092;
6099 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6102 if (GET_MODE_SIZE (mode) <= 4
6103 && ! (arm_arch4
6104 && (mode == HImode
6105 || mode == HFmode
6106 || (mode == QImode && outer == SIGN_EXTEND))))
6108 if (code == MULT)
6110 rtx xiop0 = XEXP (index, 0);
6111 rtx xiop1 = XEXP (index, 1);
6113 return ((arm_address_register_rtx_p (xiop0, strict_p)
6114 && power_of_two_operand (xiop1, SImode))
6115 || (arm_address_register_rtx_p (xiop1, strict_p)
6116 && power_of_two_operand (xiop0, SImode)));
6118 else if (code == LSHIFTRT || code == ASHIFTRT
6119 || code == ASHIFT || code == ROTATERT)
6121 rtx op = XEXP (index, 1);
6123 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6124 && CONST_INT_P (op)
6125 && INTVAL (op) > 0
6126 && INTVAL (op) <= 31);
6130 /* For ARM v4 we may be doing a sign-extend operation during the
6131 load. */
6132 if (arm_arch4)
6134 if (mode == HImode
6135 || mode == HFmode
6136 || (outer == SIGN_EXTEND && mode == QImode))
6137 range = 256;
6138 else
6139 range = 4096;
6141 else
6142 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6144 return (code == CONST_INT
6145 && INTVAL (index) < range
6146 && INTVAL (index) > -range);
6149 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6150 index operand. i.e. 1, 2, 4 or 8. */
6151 static bool
6152 thumb2_index_mul_operand (rtx op)
6154 HOST_WIDE_INT val;
6156 if (!CONST_INT_P (op))
6157 return false;
6159 val = INTVAL(op);
6160 return (val == 1 || val == 2 || val == 4 || val == 8);
6163 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6164 static int
6165 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6167 enum rtx_code code = GET_CODE (index);
6169 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6170 /* Standard coprocessor addressing modes. */
6171 if (TARGET_HARD_FLOAT
6172 && TARGET_VFP
6173 && (mode == SFmode || mode == DFmode))
6174 return (code == CONST_INT && INTVAL (index) < 1024
6175 /* Thumb-2 allows only > -256 index range for it's core register
6176 load/stores. Since we allow SF/DF in core registers, we have
6177 to use the intersection between -256~4096 (core) and -1024~1024
6178 (coprocessor). */
6179 && INTVAL (index) > -256
6180 && (INTVAL (index) & 3) == 0);
6182 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6184 /* For DImode assume values will usually live in core regs
6185 and only allow LDRD addressing modes. */
6186 if (!TARGET_LDRD || mode != DImode)
6187 return (code == CONST_INT
6188 && INTVAL (index) < 1024
6189 && INTVAL (index) > -1024
6190 && (INTVAL (index) & 3) == 0);
6193 /* For quad modes, we restrict the constant offset to be slightly less
6194 than what the instruction format permits. We do this because for
6195 quad mode moves, we will actually decompose them into two separate
6196 double-mode reads or writes. INDEX must therefore be a valid
6197 (double-mode) offset and so should INDEX+8. */
6198 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6199 return (code == CONST_INT
6200 && INTVAL (index) < 1016
6201 && INTVAL (index) > -1024
6202 && (INTVAL (index) & 3) == 0);
6204 /* We have no such constraint on double mode offsets, so we permit the
6205 full range of the instruction format. */
6206 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6207 return (code == CONST_INT
6208 && INTVAL (index) < 1024
6209 && INTVAL (index) > -1024
6210 && (INTVAL (index) & 3) == 0);
6212 if (arm_address_register_rtx_p (index, strict_p)
6213 && (GET_MODE_SIZE (mode) <= 4))
6214 return 1;
6216 if (mode == DImode || mode == DFmode)
6218 if (code == CONST_INT)
6220 HOST_WIDE_INT val = INTVAL (index);
6221 /* ??? Can we assume ldrd for thumb2? */
6222 /* Thumb-2 ldrd only has reg+const addressing modes. */
6223 /* ldrd supports offsets of +-1020.
6224 However the ldr fallback does not. */
6225 return val > -256 && val < 256 && (val & 3) == 0;
6227 else
6228 return 0;
6231 if (code == MULT)
6233 rtx xiop0 = XEXP (index, 0);
6234 rtx xiop1 = XEXP (index, 1);
6236 return ((arm_address_register_rtx_p (xiop0, strict_p)
6237 && thumb2_index_mul_operand (xiop1))
6238 || (arm_address_register_rtx_p (xiop1, strict_p)
6239 && thumb2_index_mul_operand (xiop0)));
6241 else if (code == ASHIFT)
6243 rtx op = XEXP (index, 1);
6245 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6246 && CONST_INT_P (op)
6247 && INTVAL (op) > 0
6248 && INTVAL (op) <= 3);
6251 return (code == CONST_INT
6252 && INTVAL (index) < 4096
6253 && INTVAL (index) > -256);
6256 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6257 static int
6258 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6260 int regno;
6262 if (!REG_P (x))
6263 return 0;
6265 regno = REGNO (x);
6267 if (strict_p)
6268 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6270 return (regno <= LAST_LO_REGNUM
6271 || regno > LAST_VIRTUAL_REGISTER
6272 || regno == FRAME_POINTER_REGNUM
6273 || (GET_MODE_SIZE (mode) >= 4
6274 && (regno == STACK_POINTER_REGNUM
6275 || regno >= FIRST_PSEUDO_REGISTER
6276 || x == hard_frame_pointer_rtx
6277 || x == arg_pointer_rtx)));
6280 /* Return nonzero if x is a legitimate index register. This is the case
6281 for any base register that can access a QImode object. */
6282 inline static int
6283 thumb1_index_register_rtx_p (rtx x, int strict_p)
6285 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6288 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6290 The AP may be eliminated to either the SP or the FP, so we use the
6291 least common denominator, e.g. SImode, and offsets from 0 to 64.
6293 ??? Verify whether the above is the right approach.
6295 ??? Also, the FP may be eliminated to the SP, so perhaps that
6296 needs special handling also.
6298 ??? Look at how the mips16 port solves this problem. It probably uses
6299 better ways to solve some of these problems.
6301 Although it is not incorrect, we don't accept QImode and HImode
6302 addresses based on the frame pointer or arg pointer until the
6303 reload pass starts. This is so that eliminating such addresses
6304 into stack based ones won't produce impossible code. */
6306 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6308 /* ??? Not clear if this is right. Experiment. */
6309 if (GET_MODE_SIZE (mode) < 4
6310 && !(reload_in_progress || reload_completed)
6311 && (reg_mentioned_p (frame_pointer_rtx, x)
6312 || reg_mentioned_p (arg_pointer_rtx, x)
6313 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6314 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6315 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6316 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6317 return 0;
6319 /* Accept any base register. SP only in SImode or larger. */
6320 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6321 return 1;
6323 /* This is PC relative data before arm_reorg runs. */
6324 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6325 && GET_CODE (x) == SYMBOL_REF
6326 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6327 return 1;
6329 /* This is PC relative data after arm_reorg runs. */
6330 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6331 && reload_completed
6332 && (GET_CODE (x) == LABEL_REF
6333 || (GET_CODE (x) == CONST
6334 && GET_CODE (XEXP (x, 0)) == PLUS
6335 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6336 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6337 return 1;
6339 /* Post-inc indexing only supported for SImode and larger. */
6340 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6341 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6342 return 1;
6344 else if (GET_CODE (x) == PLUS)
6346 /* REG+REG address can be any two index registers. */
6347 /* We disallow FRAME+REG addressing since we know that FRAME
6348 will be replaced with STACK, and SP relative addressing only
6349 permits SP+OFFSET. */
6350 if (GET_MODE_SIZE (mode) <= 4
6351 && XEXP (x, 0) != frame_pointer_rtx
6352 && XEXP (x, 1) != frame_pointer_rtx
6353 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6354 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6355 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6356 return 1;
6358 /* REG+const has 5-7 bit offset for non-SP registers. */
6359 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6360 || XEXP (x, 0) == arg_pointer_rtx)
6361 && CONST_INT_P (XEXP (x, 1))
6362 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6363 return 1;
6365 /* REG+const has 10-bit offset for SP, but only SImode and
6366 larger is supported. */
6367 /* ??? Should probably check for DI/DFmode overflow here
6368 just like GO_IF_LEGITIMATE_OFFSET does. */
6369 else if (REG_P (XEXP (x, 0))
6370 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6371 && GET_MODE_SIZE (mode) >= 4
6372 && CONST_INT_P (XEXP (x, 1))
6373 && INTVAL (XEXP (x, 1)) >= 0
6374 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6375 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6376 return 1;
6378 else if (REG_P (XEXP (x, 0))
6379 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6380 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6381 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6382 && REGNO (XEXP (x, 0))
6383 <= LAST_VIRTUAL_POINTER_REGISTER))
6384 && GET_MODE_SIZE (mode) >= 4
6385 && CONST_INT_P (XEXP (x, 1))
6386 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6387 return 1;
6390 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6391 && GET_MODE_SIZE (mode) == 4
6392 && GET_CODE (x) == SYMBOL_REF
6393 && CONSTANT_POOL_ADDRESS_P (x)
6394 && ! (flag_pic
6395 && symbol_mentioned_p (get_pool_constant (x))
6396 && ! pcrel_constant_p (get_pool_constant (x))))
6397 return 1;
6399 return 0;
6402 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6403 instruction of mode MODE. */
6405 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6407 switch (GET_MODE_SIZE (mode))
6409 case 1:
6410 return val >= 0 && val < 32;
6412 case 2:
6413 return val >= 0 && val < 64 && (val & 1) == 0;
6415 default:
6416 return (val >= 0
6417 && (val + GET_MODE_SIZE (mode)) <= 128
6418 && (val & 3) == 0);
6422 bool
6423 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6425 if (TARGET_ARM)
6426 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6427 else if (TARGET_THUMB2)
6428 return thumb2_legitimate_address_p (mode, x, strict_p);
6429 else /* if (TARGET_THUMB1) */
6430 return thumb1_legitimate_address_p (mode, x, strict_p);
6433 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6435 Given an rtx X being reloaded into a reg required to be
6436 in class CLASS, return the class of reg to actually use.
6437 In general this is just CLASS, but for the Thumb core registers and
6438 immediate constants we prefer a LO_REGS class or a subset. */
6440 static reg_class_t
6441 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6443 if (TARGET_32BIT)
6444 return rclass;
6445 else
6447 if (rclass == GENERAL_REGS
6448 || rclass == HI_REGS
6449 || rclass == NO_REGS
6450 || rclass == STACK_REG)
6451 return LO_REGS;
6452 else
6453 return rclass;
6457 /* Build the SYMBOL_REF for __tls_get_addr. */
6459 static GTY(()) rtx tls_get_addr_libfunc;
6461 static rtx
6462 get_tls_get_addr (void)
6464 if (!tls_get_addr_libfunc)
6465 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6466 return tls_get_addr_libfunc;
6470 arm_load_tp (rtx target)
6472 if (!target)
6473 target = gen_reg_rtx (SImode);
6475 if (TARGET_HARD_TP)
6477 /* Can return in any reg. */
6478 emit_insn (gen_load_tp_hard (target));
6480 else
6482 /* Always returned in r0. Immediately copy the result into a pseudo,
6483 otherwise other uses of r0 (e.g. setting up function arguments) may
6484 clobber the value. */
6486 rtx tmp;
6488 emit_insn (gen_load_tp_soft ());
6490 tmp = gen_rtx_REG (SImode, 0);
6491 emit_move_insn (target, tmp);
6493 return target;
6496 static rtx
6497 load_tls_operand (rtx x, rtx reg)
6499 rtx tmp;
6501 if (reg == NULL_RTX)
6502 reg = gen_reg_rtx (SImode);
6504 tmp = gen_rtx_CONST (SImode, x);
6506 emit_move_insn (reg, tmp);
6508 return reg;
6511 static rtx
6512 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6514 rtx insns, label, labelno, sum;
6516 gcc_assert (reloc != TLS_DESCSEQ);
6517 start_sequence ();
6519 labelno = GEN_INT (pic_labelno++);
6520 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6521 label = gen_rtx_CONST (VOIDmode, label);
6523 sum = gen_rtx_UNSPEC (Pmode,
6524 gen_rtvec (4, x, GEN_INT (reloc), label,
6525 GEN_INT (TARGET_ARM ? 8 : 4)),
6526 UNSPEC_TLS);
6527 reg = load_tls_operand (sum, reg);
6529 if (TARGET_ARM)
6530 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6531 else
6532 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6534 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6535 LCT_PURE, /* LCT_CONST? */
6536 Pmode, 1, reg, Pmode);
6538 insns = get_insns ();
6539 end_sequence ();
6541 return insns;
6544 static rtx
6545 arm_tls_descseq_addr (rtx x, rtx reg)
6547 rtx labelno = GEN_INT (pic_labelno++);
6548 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6549 rtx sum = gen_rtx_UNSPEC (Pmode,
6550 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6551 gen_rtx_CONST (VOIDmode, label),
6552 GEN_INT (!TARGET_ARM)),
6553 UNSPEC_TLS);
6554 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6556 emit_insn (gen_tlscall (x, labelno));
6557 if (!reg)
6558 reg = gen_reg_rtx (SImode);
6559 else
6560 gcc_assert (REGNO (reg) != 0);
6562 emit_move_insn (reg, reg0);
6564 return reg;
6568 legitimize_tls_address (rtx x, rtx reg)
6570 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6571 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6573 switch (model)
6575 case TLS_MODEL_GLOBAL_DYNAMIC:
6576 if (TARGET_GNU2_TLS)
6578 reg = arm_tls_descseq_addr (x, reg);
6580 tp = arm_load_tp (NULL_RTX);
6582 dest = gen_rtx_PLUS (Pmode, tp, reg);
6584 else
6586 /* Original scheme */
6587 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6588 dest = gen_reg_rtx (Pmode);
6589 emit_libcall_block (insns, dest, ret, x);
6591 return dest;
6593 case TLS_MODEL_LOCAL_DYNAMIC:
6594 if (TARGET_GNU2_TLS)
6596 reg = arm_tls_descseq_addr (x, reg);
6598 tp = arm_load_tp (NULL_RTX);
6600 dest = gen_rtx_PLUS (Pmode, tp, reg);
6602 else
6604 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6606 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6607 share the LDM result with other LD model accesses. */
6608 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6609 UNSPEC_TLS);
6610 dest = gen_reg_rtx (Pmode);
6611 emit_libcall_block (insns, dest, ret, eqv);
6613 /* Load the addend. */
6614 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6615 GEN_INT (TLS_LDO32)),
6616 UNSPEC_TLS);
6617 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6618 dest = gen_rtx_PLUS (Pmode, dest, addend);
6620 return dest;
6622 case TLS_MODEL_INITIAL_EXEC:
6623 labelno = GEN_INT (pic_labelno++);
6624 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6625 label = gen_rtx_CONST (VOIDmode, label);
6626 sum = gen_rtx_UNSPEC (Pmode,
6627 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6628 GEN_INT (TARGET_ARM ? 8 : 4)),
6629 UNSPEC_TLS);
6630 reg = load_tls_operand (sum, reg);
6632 if (TARGET_ARM)
6633 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6634 else if (TARGET_THUMB2)
6635 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6636 else
6638 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6639 emit_move_insn (reg, gen_const_mem (SImode, reg));
6642 tp = arm_load_tp (NULL_RTX);
6644 return gen_rtx_PLUS (Pmode, tp, reg);
6646 case TLS_MODEL_LOCAL_EXEC:
6647 tp = arm_load_tp (NULL_RTX);
6649 reg = gen_rtx_UNSPEC (Pmode,
6650 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6651 UNSPEC_TLS);
6652 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6654 return gen_rtx_PLUS (Pmode, tp, reg);
6656 default:
6657 abort ();
6661 /* Try machine-dependent ways of modifying an illegitimate address
6662 to be legitimate. If we find one, return the new, valid address. */
6664 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6666 if (arm_tls_referenced_p (x))
6668 rtx addend = NULL;
6670 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
6672 addend = XEXP (XEXP (x, 0), 1);
6673 x = XEXP (XEXP (x, 0), 0);
6676 if (GET_CODE (x) != SYMBOL_REF)
6677 return x;
6679 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
6681 x = legitimize_tls_address (x, NULL_RTX);
6683 if (addend)
6685 x = gen_rtx_PLUS (SImode, x, addend);
6686 orig_x = x;
6688 else
6689 return x;
6692 if (!TARGET_ARM)
6694 /* TODO: legitimize_address for Thumb2. */
6695 if (TARGET_THUMB2)
6696 return x;
6697 return thumb_legitimize_address (x, orig_x, mode);
6700 if (GET_CODE (x) == PLUS)
6702 rtx xop0 = XEXP (x, 0);
6703 rtx xop1 = XEXP (x, 1);
6705 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6706 xop0 = force_reg (SImode, xop0);
6708 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6709 xop1 = force_reg (SImode, xop1);
6711 if (ARM_BASE_REGISTER_RTX_P (xop0)
6712 && CONST_INT_P (xop1))
6714 HOST_WIDE_INT n, low_n;
6715 rtx base_reg, val;
6716 n = INTVAL (xop1);
6718 /* VFP addressing modes actually allow greater offsets, but for
6719 now we just stick with the lowest common denominator. */
6720 if (mode == DImode
6721 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6723 low_n = n & 0x0f;
6724 n &= ~0x0f;
6725 if (low_n > 4)
6727 n += 16;
6728 low_n -= 16;
6731 else
6733 low_n = ((mode) == TImode ? 0
6734 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6735 n -= low_n;
6738 base_reg = gen_reg_rtx (SImode);
6739 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6740 emit_move_insn (base_reg, val);
6741 x = plus_constant (Pmode, base_reg, low_n);
6743 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6744 x = gen_rtx_PLUS (SImode, xop0, xop1);
6747 /* XXX We don't allow MINUS any more -- see comment in
6748 arm_legitimate_address_outer_p (). */
6749 else if (GET_CODE (x) == MINUS)
6751 rtx xop0 = XEXP (x, 0);
6752 rtx xop1 = XEXP (x, 1);
6754 if (CONSTANT_P (xop0))
6755 xop0 = force_reg (SImode, xop0);
6757 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6758 xop1 = force_reg (SImode, xop1);
6760 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6761 x = gen_rtx_MINUS (SImode, xop0, xop1);
6764 /* Make sure to take full advantage of the pre-indexed addressing mode
6765 with absolute addresses which often allows for the base register to
6766 be factorized for multiple adjacent memory references, and it might
6767 even allows for the mini pool to be avoided entirely. */
6768 else if (CONST_INT_P (x) && optimize > 0)
6770 unsigned int bits;
6771 HOST_WIDE_INT mask, base, index;
6772 rtx base_reg;
6774 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6775 use a 8-bit index. So let's use a 12-bit index for SImode only and
6776 hope that arm_gen_constant will enable ldrb to use more bits. */
6777 bits = (mode == SImode) ? 12 : 8;
6778 mask = (1 << bits) - 1;
6779 base = INTVAL (x) & ~mask;
6780 index = INTVAL (x) & mask;
6781 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6783 /* It'll most probably be more efficient to generate the base
6784 with more bits set and use a negative index instead. */
6785 base |= mask;
6786 index -= mask;
6788 base_reg = force_reg (SImode, GEN_INT (base));
6789 x = plus_constant (Pmode, base_reg, index);
6792 if (flag_pic)
6794 /* We need to find and carefully transform any SYMBOL and LABEL
6795 references; so go back to the original address expression. */
6796 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6798 if (new_x != orig_x)
6799 x = new_x;
6802 return x;
6806 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6807 to be legitimate. If we find one, return the new, valid address. */
6809 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6811 if (GET_CODE (x) == PLUS
6812 && CONST_INT_P (XEXP (x, 1))
6813 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6814 || INTVAL (XEXP (x, 1)) < 0))
6816 rtx xop0 = XEXP (x, 0);
6817 rtx xop1 = XEXP (x, 1);
6818 HOST_WIDE_INT offset = INTVAL (xop1);
6820 /* Try and fold the offset into a biasing of the base register and
6821 then offsetting that. Don't do this when optimizing for space
6822 since it can cause too many CSEs. */
6823 if (optimize_size && offset >= 0
6824 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6826 HOST_WIDE_INT delta;
6828 if (offset >= 256)
6829 delta = offset - (256 - GET_MODE_SIZE (mode));
6830 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6831 delta = 31 * GET_MODE_SIZE (mode);
6832 else
6833 delta = offset & (~31 * GET_MODE_SIZE (mode));
6835 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6836 NULL_RTX);
6837 x = plus_constant (Pmode, xop0, delta);
6839 else if (offset < 0 && offset > -256)
6840 /* Small negative offsets are best done with a subtract before the
6841 dereference, forcing these into a register normally takes two
6842 instructions. */
6843 x = force_operand (x, NULL_RTX);
6844 else
6846 /* For the remaining cases, force the constant into a register. */
6847 xop1 = force_reg (SImode, xop1);
6848 x = gen_rtx_PLUS (SImode, xop0, xop1);
6851 else if (GET_CODE (x) == PLUS
6852 && s_register_operand (XEXP (x, 1), SImode)
6853 && !s_register_operand (XEXP (x, 0), SImode))
6855 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6857 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6860 if (flag_pic)
6862 /* We need to find and carefully transform any SYMBOL and LABEL
6863 references; so go back to the original address expression. */
6864 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6866 if (new_x != orig_x)
6867 x = new_x;
6870 return x;
6873 bool
6874 arm_legitimize_reload_address (rtx *p,
6875 enum machine_mode mode,
6876 int opnum, int type,
6877 int ind_levels ATTRIBUTE_UNUSED)
6879 /* We must recognize output that we have already generated ourselves. */
6880 if (GET_CODE (*p) == PLUS
6881 && GET_CODE (XEXP (*p, 0)) == PLUS
6882 && REG_P (XEXP (XEXP (*p, 0), 0))
6883 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6884 && CONST_INT_P (XEXP (*p, 1)))
6886 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6887 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6888 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6889 return true;
6892 if (GET_CODE (*p) == PLUS
6893 && REG_P (XEXP (*p, 0))
6894 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6895 /* If the base register is equivalent to a constant, let the generic
6896 code handle it. Otherwise we will run into problems if a future
6897 reload pass decides to rematerialize the constant. */
6898 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6899 && CONST_INT_P (XEXP (*p, 1)))
6901 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6902 HOST_WIDE_INT low, high;
6904 /* Detect coprocessor load/stores. */
6905 bool coproc_p = ((TARGET_HARD_FLOAT
6906 && TARGET_VFP
6907 && (mode == SFmode || mode == DFmode))
6908 || (TARGET_REALLY_IWMMXT
6909 && VALID_IWMMXT_REG_MODE (mode))
6910 || (TARGET_NEON
6911 && (VALID_NEON_DREG_MODE (mode)
6912 || VALID_NEON_QREG_MODE (mode))));
6914 /* For some conditions, bail out when lower two bits are unaligned. */
6915 if ((val & 0x3) != 0
6916 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6917 && (coproc_p
6918 /* For DI, and DF under soft-float: */
6919 || ((mode == DImode || mode == DFmode)
6920 /* Without ldrd, we use stm/ldm, which does not
6921 fair well with unaligned bits. */
6922 && (! TARGET_LDRD
6923 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6924 || TARGET_THUMB2))))
6925 return false;
6927 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6928 of which the (reg+high) gets turned into a reload add insn,
6929 we try to decompose the index into high/low values that can often
6930 also lead to better reload CSE.
6931 For example:
6932 ldr r0, [r2, #4100] // Offset too large
6933 ldr r1, [r2, #4104] // Offset too large
6935 is best reloaded as:
6936 add t1, r2, #4096
6937 ldr r0, [t1, #4]
6938 add t2, r2, #4096
6939 ldr r1, [t2, #8]
6941 which post-reload CSE can simplify in most cases to eliminate the
6942 second add instruction:
6943 add t1, r2, #4096
6944 ldr r0, [t1, #4]
6945 ldr r1, [t1, #8]
6947 The idea here is that we want to split out the bits of the constant
6948 as a mask, rather than as subtracting the maximum offset that the
6949 respective type of load/store used can handle.
6951 When encountering negative offsets, we can still utilize it even if
6952 the overall offset is positive; sometimes this may lead to an immediate
6953 that can be constructed with fewer instructions.
6954 For example:
6955 ldr r0, [r2, #0x3FFFFC]
6957 This is best reloaded as:
6958 add t1, r2, #0x400000
6959 ldr r0, [t1, #-4]
6961 The trick for spotting this for a load insn with N bits of offset
6962 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6963 negative offset that is going to make bit N and all the bits below
6964 it become zero in the remainder part.
6966 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6967 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6968 used in most cases of ARM load/store instructions. */
6970 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6971 (((VAL) & ((1 << (N)) - 1)) \
6972 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6973 : 0)
6975 if (coproc_p)
6977 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6979 /* NEON quad-word load/stores are made of two double-word accesses,
6980 so the valid index range is reduced by 8. Treat as 9-bit range if
6981 we go over it. */
6982 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6983 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6985 else if (GET_MODE_SIZE (mode) == 8)
6987 if (TARGET_LDRD)
6988 low = (TARGET_THUMB2
6989 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6990 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6991 else
6992 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6993 to access doublewords. The supported load/store offsets are
6994 -8, -4, and 4, which we try to produce here. */
6995 low = ((val & 0xf) ^ 0x8) - 0x8;
6997 else if (GET_MODE_SIZE (mode) < 8)
6999 /* NEON element load/stores do not have an offset. */
7000 if (TARGET_NEON_FP16 && mode == HFmode)
7001 return false;
7003 if (TARGET_THUMB2)
7005 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7006 Try the wider 12-bit range first, and re-try if the result
7007 is out of range. */
7008 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7009 if (low < -255)
7010 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7012 else
7014 if (mode == HImode || mode == HFmode)
7016 if (arm_arch4)
7017 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7018 else
7020 /* The storehi/movhi_bytes fallbacks can use only
7021 [-4094,+4094] of the full ldrb/strb index range. */
7022 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7023 if (low == 4095 || low == -4095)
7024 return false;
7027 else
7028 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7031 else
7032 return false;
7034 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7035 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7036 - (unsigned HOST_WIDE_INT) 0x80000000);
7037 /* Check for overflow or zero */
7038 if (low == 0 || high == 0 || (high + low != val))
7039 return false;
7041 /* Reload the high part into a base reg; leave the low part
7042 in the mem. */
7043 *p = gen_rtx_PLUS (GET_MODE (*p),
7044 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7045 GEN_INT (high)),
7046 GEN_INT (low));
7047 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7048 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7049 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7050 return true;
7053 return false;
7057 thumb_legitimize_reload_address (rtx *x_p,
7058 enum machine_mode mode,
7059 int opnum, int type,
7060 int ind_levels ATTRIBUTE_UNUSED)
7062 rtx x = *x_p;
7064 if (GET_CODE (x) == PLUS
7065 && GET_MODE_SIZE (mode) < 4
7066 && REG_P (XEXP (x, 0))
7067 && XEXP (x, 0) == stack_pointer_rtx
7068 && CONST_INT_P (XEXP (x, 1))
7069 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7071 rtx orig_x = x;
7073 x = copy_rtx (x);
7074 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7075 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7076 return x;
7079 /* If both registers are hi-regs, then it's better to reload the
7080 entire expression rather than each register individually. That
7081 only requires one reload register rather than two. */
7082 if (GET_CODE (x) == PLUS
7083 && REG_P (XEXP (x, 0))
7084 && REG_P (XEXP (x, 1))
7085 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7086 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7088 rtx orig_x = x;
7090 x = copy_rtx (x);
7091 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7092 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7093 return x;
7096 return NULL;
7099 /* Test for various thread-local symbols. */
7101 /* Helper for arm_tls_referenced_p. */
7103 static int
7104 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7106 if (GET_CODE (*x) == SYMBOL_REF)
7107 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7109 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7110 TLS offsets, not real symbol references. */
7111 if (GET_CODE (*x) == UNSPEC
7112 && XINT (*x, 1) == UNSPEC_TLS)
7113 return -1;
7115 return 0;
7118 /* Return TRUE if X contains any TLS symbol references. */
7120 bool
7121 arm_tls_referenced_p (rtx x)
7123 if (! TARGET_HAVE_TLS)
7124 return false;
7126 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7129 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7131 On the ARM, allow any integer (invalid ones are removed later by insn
7132 patterns), nice doubles and symbol_refs which refer to the function's
7133 constant pool XXX.
7135 When generating pic allow anything. */
7137 static bool
7138 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7140 /* At present, we have no support for Neon structure constants, so forbid
7141 them here. It might be possible to handle simple cases like 0 and -1
7142 in future. */
7143 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7144 return false;
7146 return flag_pic || !label_mentioned_p (x);
7149 static bool
7150 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7152 return (CONST_INT_P (x)
7153 || CONST_DOUBLE_P (x)
7154 || CONSTANT_ADDRESS_P (x)
7155 || flag_pic);
7158 static bool
7159 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7161 return (!arm_cannot_force_const_mem (mode, x)
7162 && (TARGET_32BIT
7163 ? arm_legitimate_constant_p_1 (mode, x)
7164 : thumb_legitimate_constant_p (mode, x)));
7167 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7169 static bool
7170 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7172 rtx base, offset;
7174 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7176 split_const (x, &base, &offset);
7177 if (GET_CODE (base) == SYMBOL_REF
7178 && !offset_within_block_p (base, INTVAL (offset)))
7179 return true;
7181 return arm_tls_referenced_p (x);
7184 #define REG_OR_SUBREG_REG(X) \
7185 (REG_P (X) \
7186 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7188 #define REG_OR_SUBREG_RTX(X) \
7189 (REG_P (X) ? (X) : SUBREG_REG (X))
7191 static inline int
7192 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7194 enum machine_mode mode = GET_MODE (x);
7195 int total, words;
7197 switch (code)
7199 case ASHIFT:
7200 case ASHIFTRT:
7201 case LSHIFTRT:
7202 case ROTATERT:
7203 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7205 case PLUS:
7206 case MINUS:
7207 case COMPARE:
7208 case NEG:
7209 case NOT:
7210 return COSTS_N_INSNS (1);
7212 case MULT:
7213 if (CONST_INT_P (XEXP (x, 1)))
7215 int cycles = 0;
7216 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7218 while (i)
7220 i >>= 2;
7221 cycles++;
7223 return COSTS_N_INSNS (2) + cycles;
7225 return COSTS_N_INSNS (1) + 16;
7227 case SET:
7228 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7229 the mode. */
7230 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7231 return (COSTS_N_INSNS (words)
7232 + 4 * ((MEM_P (SET_SRC (x)))
7233 + MEM_P (SET_DEST (x))));
7235 case CONST_INT:
7236 if (outer == SET)
7238 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7239 return 0;
7240 if (thumb_shiftable_const (INTVAL (x)))
7241 return COSTS_N_INSNS (2);
7242 return COSTS_N_INSNS (3);
7244 else if ((outer == PLUS || outer == COMPARE)
7245 && INTVAL (x) < 256 && INTVAL (x) > -256)
7246 return 0;
7247 else if ((outer == IOR || outer == XOR || outer == AND)
7248 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7249 return COSTS_N_INSNS (1);
7250 else if (outer == AND)
7252 int i;
7253 /* This duplicates the tests in the andsi3 expander. */
7254 for (i = 9; i <= 31; i++)
7255 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7256 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7257 return COSTS_N_INSNS (2);
7259 else if (outer == ASHIFT || outer == ASHIFTRT
7260 || outer == LSHIFTRT)
7261 return 0;
7262 return COSTS_N_INSNS (2);
7264 case CONST:
7265 case CONST_DOUBLE:
7266 case LABEL_REF:
7267 case SYMBOL_REF:
7268 return COSTS_N_INSNS (3);
7270 case UDIV:
7271 case UMOD:
7272 case DIV:
7273 case MOD:
7274 return 100;
7276 case TRUNCATE:
7277 return 99;
7279 case AND:
7280 case XOR:
7281 case IOR:
7282 /* XXX guess. */
7283 return 8;
7285 case MEM:
7286 /* XXX another guess. */
7287 /* Memory costs quite a lot for the first word, but subsequent words
7288 load at the equivalent of a single insn each. */
7289 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7290 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7291 ? 4 : 0));
7293 case IF_THEN_ELSE:
7294 /* XXX a guess. */
7295 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7296 return 14;
7297 return 2;
7299 case SIGN_EXTEND:
7300 case ZERO_EXTEND:
7301 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7302 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7304 if (mode == SImode)
7305 return total;
7307 if (arm_arch6)
7308 return total + COSTS_N_INSNS (1);
7310 /* Assume a two-shift sequence. Increase the cost slightly so
7311 we prefer actual shifts over an extend operation. */
7312 return total + 1 + COSTS_N_INSNS (2);
7314 default:
7315 return 99;
7319 static inline bool
7320 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7322 enum machine_mode mode = GET_MODE (x);
7323 enum rtx_code subcode;
7324 rtx operand;
7325 enum rtx_code code = GET_CODE (x);
7326 *total = 0;
7328 switch (code)
7330 case MEM:
7331 /* Memory costs quite a lot for the first word, but subsequent words
7332 load at the equivalent of a single insn each. */
7333 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7334 return true;
7336 case DIV:
7337 case MOD:
7338 case UDIV:
7339 case UMOD:
7340 if (TARGET_HARD_FLOAT && mode == SFmode)
7341 *total = COSTS_N_INSNS (2);
7342 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7343 *total = COSTS_N_INSNS (4);
7344 else
7345 *total = COSTS_N_INSNS (20);
7346 return false;
7348 case ROTATE:
7349 if (REG_P (XEXP (x, 1)))
7350 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7351 else if (!CONST_INT_P (XEXP (x, 1)))
7352 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7354 /* Fall through */
7355 case ROTATERT:
7356 if (mode != SImode)
7358 *total += COSTS_N_INSNS (4);
7359 return true;
7362 /* Fall through */
7363 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7364 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7365 if (mode == DImode)
7367 *total += COSTS_N_INSNS (3);
7368 return true;
7371 *total += COSTS_N_INSNS (1);
7372 /* Increase the cost of complex shifts because they aren't any faster,
7373 and reduce dual issue opportunities. */
7374 if (arm_tune_cortex_a9
7375 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7376 ++*total;
7378 return true;
7380 case MINUS:
7381 if (mode == DImode)
7383 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7384 if (CONST_INT_P (XEXP (x, 0))
7385 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7387 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7388 return true;
7391 if (CONST_INT_P (XEXP (x, 1))
7392 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7394 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7395 return true;
7398 return false;
7401 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7403 if (TARGET_HARD_FLOAT
7404 && (mode == SFmode
7405 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7407 *total = COSTS_N_INSNS (1);
7408 if (CONST_DOUBLE_P (XEXP (x, 0))
7409 && arm_const_double_rtx (XEXP (x, 0)))
7411 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7412 return true;
7415 if (CONST_DOUBLE_P (XEXP (x, 1))
7416 && arm_const_double_rtx (XEXP (x, 1)))
7418 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7419 return true;
7422 return false;
7424 *total = COSTS_N_INSNS (20);
7425 return false;
7428 *total = COSTS_N_INSNS (1);
7429 if (CONST_INT_P (XEXP (x, 0))
7430 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7432 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7433 return true;
7436 subcode = GET_CODE (XEXP (x, 1));
7437 if (subcode == ASHIFT || subcode == ASHIFTRT
7438 || subcode == LSHIFTRT
7439 || subcode == ROTATE || subcode == ROTATERT)
7441 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7442 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7443 return true;
7446 /* A shift as a part of RSB costs no more than RSB itself. */
7447 if (GET_CODE (XEXP (x, 0)) == MULT
7448 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7450 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7451 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7452 return true;
7455 if (subcode == MULT
7456 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7458 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7459 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7460 return true;
7463 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7464 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7466 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7467 if (REG_P (XEXP (XEXP (x, 1), 0))
7468 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7469 *total += COSTS_N_INSNS (1);
7471 return true;
7474 /* Fall through */
7476 case PLUS:
7477 if (code == PLUS && arm_arch6 && mode == SImode
7478 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7479 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7481 *total = COSTS_N_INSNS (1);
7482 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7483 0, speed);
7484 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7485 return true;
7488 /* MLA: All arguments must be registers. We filter out
7489 multiplication by a power of two, so that we fall down into
7490 the code below. */
7491 if (GET_CODE (XEXP (x, 0)) == MULT
7492 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7494 /* The cost comes from the cost of the multiply. */
7495 return false;
7498 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7500 if (TARGET_HARD_FLOAT
7501 && (mode == SFmode
7502 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7504 *total = COSTS_N_INSNS (1);
7505 if (CONST_DOUBLE_P (XEXP (x, 1))
7506 && arm_const_double_rtx (XEXP (x, 1)))
7508 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7509 return true;
7512 return false;
7515 *total = COSTS_N_INSNS (20);
7516 return false;
7519 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7520 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7522 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7523 if (REG_P (XEXP (XEXP (x, 0), 0))
7524 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7525 *total += COSTS_N_INSNS (1);
7526 return true;
7529 /* Fall through */
7531 case AND: case XOR: case IOR:
7533 /* Normally the frame registers will be spilt into reg+const during
7534 reload, so it is a bad idea to combine them with other instructions,
7535 since then they might not be moved outside of loops. As a compromise
7536 we allow integration with ops that have a constant as their second
7537 operand. */
7538 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7539 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7540 && !CONST_INT_P (XEXP (x, 1)))
7541 *total = COSTS_N_INSNS (1);
7543 if (mode == DImode)
7545 *total += COSTS_N_INSNS (2);
7546 if (CONST_INT_P (XEXP (x, 1))
7547 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7549 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7550 return true;
7553 return false;
7556 *total += COSTS_N_INSNS (1);
7557 if (CONST_INT_P (XEXP (x, 1))
7558 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7560 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7561 return true;
7563 subcode = GET_CODE (XEXP (x, 0));
7564 if (subcode == ASHIFT || subcode == ASHIFTRT
7565 || subcode == LSHIFTRT
7566 || subcode == ROTATE || subcode == ROTATERT)
7568 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7569 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7570 return true;
7573 if (subcode == MULT
7574 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7576 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7577 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7578 return true;
7581 if (subcode == UMIN || subcode == UMAX
7582 || subcode == SMIN || subcode == SMAX)
7584 *total = COSTS_N_INSNS (3);
7585 return true;
7588 return false;
7590 case MULT:
7591 /* This should have been handled by the CPU specific routines. */
7592 gcc_unreachable ();
7594 case TRUNCATE:
7595 if (arm_arch3m && mode == SImode
7596 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7598 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7599 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7600 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7601 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7603 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7604 return true;
7606 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7607 return false;
7609 case NEG:
7610 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7612 if (TARGET_HARD_FLOAT
7613 && (mode == SFmode
7614 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7616 *total = COSTS_N_INSNS (1);
7617 return false;
7619 *total = COSTS_N_INSNS (2);
7620 return false;
7623 /* Fall through */
7624 case NOT:
7625 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7626 if (mode == SImode && code == NOT)
7628 subcode = GET_CODE (XEXP (x, 0));
7629 if (subcode == ASHIFT || subcode == ASHIFTRT
7630 || subcode == LSHIFTRT
7631 || subcode == ROTATE || subcode == ROTATERT
7632 || (subcode == MULT
7633 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7635 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7636 /* Register shifts cost an extra cycle. */
7637 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7638 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7639 subcode, 1, speed);
7640 return true;
7644 return false;
7646 case IF_THEN_ELSE:
7647 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7649 *total = COSTS_N_INSNS (4);
7650 return true;
7653 operand = XEXP (x, 0);
7655 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7656 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7657 && REG_P (XEXP (operand, 0))
7658 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7659 *total += COSTS_N_INSNS (1);
7660 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7661 + rtx_cost (XEXP (x, 2), code, 2, speed));
7662 return true;
7664 case NE:
7665 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7667 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7668 return true;
7670 goto scc_insn;
7672 case GE:
7673 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7674 && mode == SImode && XEXP (x, 1) == const0_rtx)
7676 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7677 return true;
7679 goto scc_insn;
7681 case LT:
7682 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7683 && mode == SImode && XEXP (x, 1) == const0_rtx)
7685 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7686 return true;
7688 goto scc_insn;
7690 case EQ:
7691 case GT:
7692 case LE:
7693 case GEU:
7694 case LTU:
7695 case GTU:
7696 case LEU:
7697 case UNORDERED:
7698 case ORDERED:
7699 case UNEQ:
7700 case UNGE:
7701 case UNLT:
7702 case UNGT:
7703 case UNLE:
7704 scc_insn:
7705 /* SCC insns. In the case where the comparison has already been
7706 performed, then they cost 2 instructions. Otherwise they need
7707 an additional comparison before them. */
7708 *total = COSTS_N_INSNS (2);
7709 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7711 return true;
7714 /* Fall through */
7715 case COMPARE:
7716 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7718 *total = 0;
7719 return true;
7722 *total += COSTS_N_INSNS (1);
7723 if (CONST_INT_P (XEXP (x, 1))
7724 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7726 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7727 return true;
7730 subcode = GET_CODE (XEXP (x, 0));
7731 if (subcode == ASHIFT || subcode == ASHIFTRT
7732 || subcode == LSHIFTRT
7733 || subcode == ROTATE || subcode == ROTATERT)
7735 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7736 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7737 return true;
7740 if (subcode == MULT
7741 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7743 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7744 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7745 return true;
7748 return false;
7750 case UMIN:
7751 case UMAX:
7752 case SMIN:
7753 case SMAX:
7754 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7755 if (!CONST_INT_P (XEXP (x, 1))
7756 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7757 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7758 return true;
7760 case ABS:
7761 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7763 if (TARGET_HARD_FLOAT
7764 && (mode == SFmode
7765 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7767 *total = COSTS_N_INSNS (1);
7768 return false;
7770 *total = COSTS_N_INSNS (20);
7771 return false;
7773 *total = COSTS_N_INSNS (1);
7774 if (mode == DImode)
7775 *total += COSTS_N_INSNS (3);
7776 return false;
7778 case SIGN_EXTEND:
7779 case ZERO_EXTEND:
7780 *total = 0;
7781 if (GET_MODE_CLASS (mode) == MODE_INT)
7783 rtx op = XEXP (x, 0);
7784 enum machine_mode opmode = GET_MODE (op);
7786 if (mode == DImode)
7787 *total += COSTS_N_INSNS (1);
7789 if (opmode != SImode)
7791 if (MEM_P (op))
7793 /* If !arm_arch4, we use one of the extendhisi2_mem
7794 or movhi_bytes patterns for HImode. For a QImode
7795 sign extension, we first zero-extend from memory
7796 and then perform a shift sequence. */
7797 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7798 *total += COSTS_N_INSNS (2);
7800 else if (arm_arch6)
7801 *total += COSTS_N_INSNS (1);
7803 /* We don't have the necessary insn, so we need to perform some
7804 other operation. */
7805 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7806 /* An and with constant 255. */
7807 *total += COSTS_N_INSNS (1);
7808 else
7809 /* A shift sequence. Increase costs slightly to avoid
7810 combining two shifts into an extend operation. */
7811 *total += COSTS_N_INSNS (2) + 1;
7814 return false;
7817 switch (GET_MODE (XEXP (x, 0)))
7819 case V8QImode:
7820 case V4HImode:
7821 case V2SImode:
7822 case V4QImode:
7823 case V2HImode:
7824 *total = COSTS_N_INSNS (1);
7825 return false;
7827 default:
7828 gcc_unreachable ();
7830 gcc_unreachable ();
7832 case ZERO_EXTRACT:
7833 case SIGN_EXTRACT:
7834 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7835 return true;
7837 case CONST_INT:
7838 if (const_ok_for_arm (INTVAL (x))
7839 || const_ok_for_arm (~INTVAL (x)))
7840 *total = COSTS_N_INSNS (1);
7841 else
7842 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7843 INTVAL (x), NULL_RTX,
7844 NULL_RTX, 0, 0));
7845 return true;
7847 case CONST:
7848 case LABEL_REF:
7849 case SYMBOL_REF:
7850 *total = COSTS_N_INSNS (3);
7851 return true;
7853 case HIGH:
7854 *total = COSTS_N_INSNS (1);
7855 return true;
7857 case LO_SUM:
7858 *total = COSTS_N_INSNS (1);
7859 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7860 return true;
7862 case CONST_DOUBLE:
7863 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7864 && (mode == SFmode || !TARGET_VFP_SINGLE))
7865 *total = COSTS_N_INSNS (1);
7866 else
7867 *total = COSTS_N_INSNS (4);
7868 return true;
7870 case SET:
7871 /* The vec_extract patterns accept memory operands that require an
7872 address reload. Account for the cost of that reload to give the
7873 auto-inc-dec pass an incentive to try to replace them. */
7874 if (TARGET_NEON && MEM_P (SET_DEST (x))
7875 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7877 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7878 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7879 *total += COSTS_N_INSNS (1);
7880 return true;
7882 /* Likewise for the vec_set patterns. */
7883 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7884 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7885 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7887 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7888 *total = rtx_cost (mem, code, 0, speed);
7889 if (!neon_vector_mem_operand (mem, 2))
7890 *total += COSTS_N_INSNS (1);
7891 return true;
7893 return false;
7895 case UNSPEC:
7896 /* We cost this as high as our memory costs to allow this to
7897 be hoisted from loops. */
7898 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7900 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7902 return true;
7904 case CONST_VECTOR:
7905 if (TARGET_NEON
7906 && TARGET_HARD_FLOAT
7907 && outer == SET
7908 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7909 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7910 *total = COSTS_N_INSNS (1);
7911 else
7912 *total = COSTS_N_INSNS (4);
7913 return true;
7915 default:
7916 *total = COSTS_N_INSNS (4);
7917 return false;
7921 /* Estimates the size cost of thumb1 instructions.
7922 For now most of the code is copied from thumb1_rtx_costs. We need more
7923 fine grain tuning when we have more related test cases. */
7924 static inline int
7925 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7927 enum machine_mode mode = GET_MODE (x);
7928 int words;
7930 switch (code)
7932 case ASHIFT:
7933 case ASHIFTRT:
7934 case LSHIFTRT:
7935 case ROTATERT:
7936 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7938 case PLUS:
7939 case MINUS:
7940 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
7941 defined by RTL expansion, especially for the expansion of
7942 multiplication. */
7943 if ((GET_CODE (XEXP (x, 0)) == MULT
7944 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
7945 || (GET_CODE (XEXP (x, 1)) == MULT
7946 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
7947 return COSTS_N_INSNS (2);
7948 /* On purpose fall through for normal RTX. */
7949 case COMPARE:
7950 case NEG:
7951 case NOT:
7952 return COSTS_N_INSNS (1);
7954 case MULT:
7955 if (CONST_INT_P (XEXP (x, 1)))
7957 /* Thumb1 mul instruction can't operate on const. We must Load it
7958 into a register first. */
7959 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7960 return COSTS_N_INSNS (1) + const_size;
7962 return COSTS_N_INSNS (1);
7964 case SET:
7965 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7966 the mode. */
7967 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7968 return (COSTS_N_INSNS (words)
7969 + 4 * ((MEM_P (SET_SRC (x)))
7970 + MEM_P (SET_DEST (x))));
7972 case CONST_INT:
7973 if (outer == SET)
7975 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7976 return COSTS_N_INSNS (1);
7977 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7978 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7979 return COSTS_N_INSNS (2);
7980 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7981 if (thumb_shiftable_const (INTVAL (x)))
7982 return COSTS_N_INSNS (2);
7983 return COSTS_N_INSNS (3);
7985 else if ((outer == PLUS || outer == COMPARE)
7986 && INTVAL (x) < 256 && INTVAL (x) > -256)
7987 return 0;
7988 else if ((outer == IOR || outer == XOR || outer == AND)
7989 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7990 return COSTS_N_INSNS (1);
7991 else if (outer == AND)
7993 int i;
7994 /* This duplicates the tests in the andsi3 expander. */
7995 for (i = 9; i <= 31; i++)
7996 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7997 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7998 return COSTS_N_INSNS (2);
8000 else if (outer == ASHIFT || outer == ASHIFTRT
8001 || outer == LSHIFTRT)
8002 return 0;
8003 return COSTS_N_INSNS (2);
8005 case CONST:
8006 case CONST_DOUBLE:
8007 case LABEL_REF:
8008 case SYMBOL_REF:
8009 return COSTS_N_INSNS (3);
8011 case UDIV:
8012 case UMOD:
8013 case DIV:
8014 case MOD:
8015 return 100;
8017 case TRUNCATE:
8018 return 99;
8020 case AND:
8021 case XOR:
8022 case IOR:
8023 /* XXX guess. */
8024 return 8;
8026 case MEM:
8027 /* XXX another guess. */
8028 /* Memory costs quite a lot for the first word, but subsequent words
8029 load at the equivalent of a single insn each. */
8030 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8031 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8032 ? 4 : 0));
8034 case IF_THEN_ELSE:
8035 /* XXX a guess. */
8036 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8037 return 14;
8038 return 2;
8040 case ZERO_EXTEND:
8041 /* XXX still guessing. */
8042 switch (GET_MODE (XEXP (x, 0)))
8044 case QImode:
8045 return (1 + (mode == DImode ? 4 : 0)
8046 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8048 case HImode:
8049 return (4 + (mode == DImode ? 4 : 0)
8050 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8052 case SImode:
8053 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8055 default:
8056 return 99;
8059 default:
8060 return 99;
8064 /* RTX costs when optimizing for size. */
8065 static bool
8066 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8067 int *total)
8069 enum machine_mode mode = GET_MODE (x);
8070 if (TARGET_THUMB1)
8072 *total = thumb1_size_rtx_costs (x, code, outer_code);
8073 return true;
8076 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8077 switch (code)
8079 case MEM:
8080 /* A memory access costs 1 insn if the mode is small, or the address is
8081 a single register, otherwise it costs one insn per word. */
8082 if (REG_P (XEXP (x, 0)))
8083 *total = COSTS_N_INSNS (1);
8084 else if (flag_pic
8085 && GET_CODE (XEXP (x, 0)) == PLUS
8086 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8087 /* This will be split into two instructions.
8088 See arm.md:calculate_pic_address. */
8089 *total = COSTS_N_INSNS (2);
8090 else
8091 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8092 return true;
8094 case DIV:
8095 case MOD:
8096 case UDIV:
8097 case UMOD:
8098 /* Needs a libcall, so it costs about this. */
8099 *total = COSTS_N_INSNS (2);
8100 return false;
8102 case ROTATE:
8103 if (mode == SImode && REG_P (XEXP (x, 1)))
8105 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8106 return true;
8108 /* Fall through */
8109 case ROTATERT:
8110 case ASHIFT:
8111 case LSHIFTRT:
8112 case ASHIFTRT:
8113 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8115 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8116 return true;
8118 else if (mode == SImode)
8120 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8121 /* Slightly disparage register shifts, but not by much. */
8122 if (!CONST_INT_P (XEXP (x, 1)))
8123 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8124 return true;
8127 /* Needs a libcall. */
8128 *total = COSTS_N_INSNS (2);
8129 return false;
8131 case MINUS:
8132 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8133 && (mode == SFmode || !TARGET_VFP_SINGLE))
8135 *total = COSTS_N_INSNS (1);
8136 return false;
8139 if (mode == SImode)
8141 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8142 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8144 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8145 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8146 || subcode1 == ROTATE || subcode1 == ROTATERT
8147 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8148 || subcode1 == ASHIFTRT)
8150 /* It's just the cost of the two operands. */
8151 *total = 0;
8152 return false;
8155 *total = COSTS_N_INSNS (1);
8156 return false;
8159 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8160 return false;
8162 case PLUS:
8163 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8164 && (mode == SFmode || !TARGET_VFP_SINGLE))
8166 *total = COSTS_N_INSNS (1);
8167 return false;
8170 /* A shift as a part of ADD costs nothing. */
8171 if (GET_CODE (XEXP (x, 0)) == MULT
8172 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8174 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8175 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8176 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8177 return true;
8180 /* Fall through */
8181 case AND: case XOR: case IOR:
8182 if (mode == SImode)
8184 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8186 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8187 || subcode == LSHIFTRT || subcode == ASHIFTRT
8188 || (code == AND && subcode == NOT))
8190 /* It's just the cost of the two operands. */
8191 *total = 0;
8192 return false;
8196 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8197 return false;
8199 case MULT:
8200 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8201 return false;
8203 case NEG:
8204 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8205 && (mode == SFmode || !TARGET_VFP_SINGLE))
8207 *total = COSTS_N_INSNS (1);
8208 return false;
8211 /* Fall through */
8212 case NOT:
8213 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8215 return false;
8217 case IF_THEN_ELSE:
8218 *total = 0;
8219 return false;
8221 case COMPARE:
8222 if (cc_register (XEXP (x, 0), VOIDmode))
8223 * total = 0;
8224 else
8225 *total = COSTS_N_INSNS (1);
8226 return false;
8228 case ABS:
8229 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8230 && (mode == SFmode || !TARGET_VFP_SINGLE))
8231 *total = COSTS_N_INSNS (1);
8232 else
8233 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8234 return false;
8236 case SIGN_EXTEND:
8237 case ZERO_EXTEND:
8238 return arm_rtx_costs_1 (x, outer_code, total, 0);
8240 case CONST_INT:
8241 if (const_ok_for_arm (INTVAL (x)))
8242 /* A multiplication by a constant requires another instruction
8243 to load the constant to a register. */
8244 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8245 ? 1 : 0);
8246 else if (const_ok_for_arm (~INTVAL (x)))
8247 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8248 else if (const_ok_for_arm (-INTVAL (x)))
8250 if (outer_code == COMPARE || outer_code == PLUS
8251 || outer_code == MINUS)
8252 *total = 0;
8253 else
8254 *total = COSTS_N_INSNS (1);
8256 else
8257 *total = COSTS_N_INSNS (2);
8258 return true;
8260 case CONST:
8261 case LABEL_REF:
8262 case SYMBOL_REF:
8263 *total = COSTS_N_INSNS (2);
8264 return true;
8266 case CONST_DOUBLE:
8267 *total = COSTS_N_INSNS (4);
8268 return true;
8270 case CONST_VECTOR:
8271 if (TARGET_NEON
8272 && TARGET_HARD_FLOAT
8273 && outer_code == SET
8274 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8275 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8276 *total = COSTS_N_INSNS (1);
8277 else
8278 *total = COSTS_N_INSNS (4);
8279 return true;
8281 case HIGH:
8282 case LO_SUM:
8283 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8284 cost of these slightly. */
8285 *total = COSTS_N_INSNS (1) + 1;
8286 return true;
8288 case SET:
8289 return false;
8291 default:
8292 if (mode != VOIDmode)
8293 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8294 else
8295 *total = COSTS_N_INSNS (4); /* How knows? */
8296 return false;
8300 /* RTX costs when optimizing for size. */
8301 static bool
8302 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8303 int *total, bool speed)
8305 if (!speed)
8306 return arm_size_rtx_costs (x, (enum rtx_code) code,
8307 (enum rtx_code) outer_code, total);
8308 else
8309 return current_tune->rtx_costs (x, (enum rtx_code) code,
8310 (enum rtx_code) outer_code,
8311 total, speed);
8314 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8315 supported on any "slowmul" cores, so it can be ignored. */
8317 static bool
8318 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8319 int *total, bool speed)
8321 enum machine_mode mode = GET_MODE (x);
8323 if (TARGET_THUMB)
8325 *total = thumb1_rtx_costs (x, code, outer_code);
8326 return true;
8329 switch (code)
8331 case MULT:
8332 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8333 || mode == DImode)
8335 *total = COSTS_N_INSNS (20);
8336 return false;
8339 if (CONST_INT_P (XEXP (x, 1)))
8341 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8342 & (unsigned HOST_WIDE_INT) 0xffffffff);
8343 int cost, const_ok = const_ok_for_arm (i);
8344 int j, booth_unit_size;
8346 /* Tune as appropriate. */
8347 cost = const_ok ? 4 : 8;
8348 booth_unit_size = 2;
8349 for (j = 0; i && j < 32; j += booth_unit_size)
8351 i >>= booth_unit_size;
8352 cost++;
8355 *total = COSTS_N_INSNS (cost);
8356 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8357 return true;
8360 *total = COSTS_N_INSNS (20);
8361 return false;
8363 default:
8364 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8369 /* RTX cost for cores with a fast multiply unit (M variants). */
8371 static bool
8372 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8373 int *total, bool speed)
8375 enum machine_mode mode = GET_MODE (x);
8377 if (TARGET_THUMB1)
8379 *total = thumb1_rtx_costs (x, code, outer_code);
8380 return true;
8383 /* ??? should thumb2 use different costs? */
8384 switch (code)
8386 case MULT:
8387 /* There is no point basing this on the tuning, since it is always the
8388 fast variant if it exists at all. */
8389 if (mode == DImode
8390 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8391 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8392 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8394 *total = COSTS_N_INSNS(2);
8395 return false;
8399 if (mode == DImode)
8401 *total = COSTS_N_INSNS (5);
8402 return false;
8405 if (CONST_INT_P (XEXP (x, 1)))
8407 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8408 & (unsigned HOST_WIDE_INT) 0xffffffff);
8409 int cost, const_ok = const_ok_for_arm (i);
8410 int j, booth_unit_size;
8412 /* Tune as appropriate. */
8413 cost = const_ok ? 4 : 8;
8414 booth_unit_size = 8;
8415 for (j = 0; i && j < 32; j += booth_unit_size)
8417 i >>= booth_unit_size;
8418 cost++;
8421 *total = COSTS_N_INSNS(cost);
8422 return false;
8425 if (mode == SImode)
8427 *total = COSTS_N_INSNS (4);
8428 return false;
8431 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8433 if (TARGET_HARD_FLOAT
8434 && (mode == SFmode
8435 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8437 *total = COSTS_N_INSNS (1);
8438 return false;
8442 /* Requires a lib call */
8443 *total = COSTS_N_INSNS (20);
8444 return false;
8446 default:
8447 return arm_rtx_costs_1 (x, outer_code, total, speed);
8452 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8453 so it can be ignored. */
8455 static bool
8456 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8457 int *total, bool speed)
8459 enum machine_mode mode = GET_MODE (x);
8461 if (TARGET_THUMB)
8463 *total = thumb1_rtx_costs (x, code, outer_code);
8464 return true;
8467 switch (code)
8469 case COMPARE:
8470 if (GET_CODE (XEXP (x, 0)) != MULT)
8471 return arm_rtx_costs_1 (x, outer_code, total, speed);
8473 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8474 will stall until the multiplication is complete. */
8475 *total = COSTS_N_INSNS (3);
8476 return false;
8478 case MULT:
8479 /* There is no point basing this on the tuning, since it is always the
8480 fast variant if it exists at all. */
8481 if (mode == DImode
8482 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8483 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8484 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8486 *total = COSTS_N_INSNS (2);
8487 return false;
8491 if (mode == DImode)
8493 *total = COSTS_N_INSNS (5);
8494 return false;
8497 if (CONST_INT_P (XEXP (x, 1)))
8499 /* If operand 1 is a constant we can more accurately
8500 calculate the cost of the multiply. The multiplier can
8501 retire 15 bits on the first cycle and a further 12 on the
8502 second. We do, of course, have to load the constant into
8503 a register first. */
8504 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8505 /* There's a general overhead of one cycle. */
8506 int cost = 1;
8507 unsigned HOST_WIDE_INT masked_const;
8509 if (i & 0x80000000)
8510 i = ~i;
8512 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8514 masked_const = i & 0xffff8000;
8515 if (masked_const != 0)
8517 cost++;
8518 masked_const = i & 0xf8000000;
8519 if (masked_const != 0)
8520 cost++;
8522 *total = COSTS_N_INSNS (cost);
8523 return false;
8526 if (mode == SImode)
8528 *total = COSTS_N_INSNS (3);
8529 return false;
8532 /* Requires a lib call */
8533 *total = COSTS_N_INSNS (20);
8534 return false;
8536 default:
8537 return arm_rtx_costs_1 (x, outer_code, total, speed);
8542 /* RTX costs for 9e (and later) cores. */
8544 static bool
8545 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8546 int *total, bool speed)
8548 enum machine_mode mode = GET_MODE (x);
8550 if (TARGET_THUMB1)
8552 switch (code)
8554 case MULT:
8555 *total = COSTS_N_INSNS (3);
8556 return true;
8558 default:
8559 *total = thumb1_rtx_costs (x, code, outer_code);
8560 return true;
8564 switch (code)
8566 case MULT:
8567 /* There is no point basing this on the tuning, since it is always the
8568 fast variant if it exists at all. */
8569 if (mode == DImode
8570 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8571 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8572 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8574 *total = COSTS_N_INSNS (2);
8575 return false;
8579 if (mode == DImode)
8581 *total = COSTS_N_INSNS (5);
8582 return false;
8585 if (mode == SImode)
8587 *total = COSTS_N_INSNS (2);
8588 return false;
8591 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8593 if (TARGET_HARD_FLOAT
8594 && (mode == SFmode
8595 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8597 *total = COSTS_N_INSNS (1);
8598 return false;
8602 *total = COSTS_N_INSNS (20);
8603 return false;
8605 default:
8606 return arm_rtx_costs_1 (x, outer_code, total, speed);
8609 /* All address computations that can be done are free, but rtx cost returns
8610 the same for practically all of them. So we weight the different types
8611 of address here in the order (most pref first):
8612 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8613 static inline int
8614 arm_arm_address_cost (rtx x)
8616 enum rtx_code c = GET_CODE (x);
8618 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8619 return 0;
8620 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8621 return 10;
8623 if (c == PLUS)
8625 if (CONST_INT_P (XEXP (x, 1)))
8626 return 2;
8628 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8629 return 3;
8631 return 4;
8634 return 6;
8637 static inline int
8638 arm_thumb_address_cost (rtx x)
8640 enum rtx_code c = GET_CODE (x);
8642 if (c == REG)
8643 return 1;
8644 if (c == PLUS
8645 && REG_P (XEXP (x, 0))
8646 && CONST_INT_P (XEXP (x, 1)))
8647 return 1;
8649 return 2;
8652 static int
8653 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8654 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8656 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8659 /* Adjust cost hook for XScale. */
8660 static bool
8661 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8663 /* Some true dependencies can have a higher cost depending
8664 on precisely how certain input operands are used. */
8665 if (REG_NOTE_KIND(link) == 0
8666 && recog_memoized (insn) >= 0
8667 && recog_memoized (dep) >= 0)
8669 int shift_opnum = get_attr_shift (insn);
8670 enum attr_type attr_type = get_attr_type (dep);
8672 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8673 operand for INSN. If we have a shifted input operand and the
8674 instruction we depend on is another ALU instruction, then we may
8675 have to account for an additional stall. */
8676 if (shift_opnum != 0
8677 && (attr_type == TYPE_ALU_SHIFT_IMM
8678 || attr_type == TYPE_ALUS_SHIFT_IMM
8679 || attr_type == TYPE_LOGIC_SHIFT_IMM
8680 || attr_type == TYPE_LOGICS_SHIFT_IMM
8681 || attr_type == TYPE_ALU_SHIFT_REG
8682 || attr_type == TYPE_ALUS_SHIFT_REG
8683 || attr_type == TYPE_LOGIC_SHIFT_REG
8684 || attr_type == TYPE_LOGICS_SHIFT_REG
8685 || attr_type == TYPE_MOV_SHIFT
8686 || attr_type == TYPE_MVN_SHIFT
8687 || attr_type == TYPE_MOV_SHIFT_REG
8688 || attr_type == TYPE_MVN_SHIFT_REG))
8690 rtx shifted_operand;
8691 int opno;
8693 /* Get the shifted operand. */
8694 extract_insn (insn);
8695 shifted_operand = recog_data.operand[shift_opnum];
8697 /* Iterate over all the operands in DEP. If we write an operand
8698 that overlaps with SHIFTED_OPERAND, then we have increase the
8699 cost of this dependency. */
8700 extract_insn (dep);
8701 preprocess_constraints ();
8702 for (opno = 0; opno < recog_data.n_operands; opno++)
8704 /* We can ignore strict inputs. */
8705 if (recog_data.operand_type[opno] == OP_IN)
8706 continue;
8708 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8709 shifted_operand))
8711 *cost = 2;
8712 return false;
8717 return true;
8720 /* Adjust cost hook for Cortex A9. */
8721 static bool
8722 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8724 switch (REG_NOTE_KIND (link))
8726 case REG_DEP_ANTI:
8727 *cost = 0;
8728 return false;
8730 case REG_DEP_TRUE:
8731 case REG_DEP_OUTPUT:
8732 if (recog_memoized (insn) >= 0
8733 && recog_memoized (dep) >= 0)
8735 if (GET_CODE (PATTERN (insn)) == SET)
8737 if (GET_MODE_CLASS
8738 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8739 || GET_MODE_CLASS
8740 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8742 enum attr_type attr_type_insn = get_attr_type (insn);
8743 enum attr_type attr_type_dep = get_attr_type (dep);
8745 /* By default all dependencies of the form
8746 s0 = s0 <op> s1
8747 s0 = s0 <op> s2
8748 have an extra latency of 1 cycle because
8749 of the input and output dependency in this
8750 case. However this gets modeled as an true
8751 dependency and hence all these checks. */
8752 if (REG_P (SET_DEST (PATTERN (insn)))
8753 && REG_P (SET_DEST (PATTERN (dep)))
8754 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8755 SET_DEST (PATTERN (dep))))
8757 /* FMACS is a special case where the dependent
8758 instruction can be issued 3 cycles before
8759 the normal latency in case of an output
8760 dependency. */
8761 if ((attr_type_insn == TYPE_FMACS
8762 || attr_type_insn == TYPE_FMACD)
8763 && (attr_type_dep == TYPE_FMACS
8764 || attr_type_dep == TYPE_FMACD))
8766 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8767 *cost = insn_default_latency (dep) - 3;
8768 else
8769 *cost = insn_default_latency (dep);
8770 return false;
8772 else
8774 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8775 *cost = insn_default_latency (dep) + 1;
8776 else
8777 *cost = insn_default_latency (dep);
8779 return false;
8784 break;
8786 default:
8787 gcc_unreachable ();
8790 return true;
8793 /* Adjust cost hook for FA726TE. */
8794 static bool
8795 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8797 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8798 have penalty of 3. */
8799 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8800 && recog_memoized (insn) >= 0
8801 && recog_memoized (dep) >= 0
8802 && get_attr_conds (dep) == CONDS_SET)
8804 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8805 if (get_attr_conds (insn) == CONDS_USE
8806 && get_attr_type (insn) != TYPE_BRANCH)
8808 *cost = 3;
8809 return false;
8812 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8813 || get_attr_conds (insn) == CONDS_USE)
8815 *cost = 0;
8816 return false;
8820 return true;
8823 /* Implement TARGET_REGISTER_MOVE_COST.
8825 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8826 it is typically more expensive than a single memory access. We set
8827 the cost to less than two memory accesses so that floating
8828 point to integer conversion does not go through memory. */
8831 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8832 reg_class_t from, reg_class_t to)
8834 if (TARGET_32BIT)
8836 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8837 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8838 return 15;
8839 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8840 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8841 return 4;
8842 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8843 return 20;
8844 else
8845 return 2;
8847 else
8849 if (from == HI_REGS || to == HI_REGS)
8850 return 4;
8851 else
8852 return 2;
8856 /* Implement TARGET_MEMORY_MOVE_COST. */
8859 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8860 bool in ATTRIBUTE_UNUSED)
8862 if (TARGET_32BIT)
8863 return 10;
8864 else
8866 if (GET_MODE_SIZE (mode) < 4)
8867 return 8;
8868 else
8869 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8873 /* Vectorizer cost model implementation. */
8875 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8876 static int
8877 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8878 tree vectype,
8879 int misalign ATTRIBUTE_UNUSED)
8881 unsigned elements;
8883 switch (type_of_cost)
8885 case scalar_stmt:
8886 return current_tune->vec_costs->scalar_stmt_cost;
8888 case scalar_load:
8889 return current_tune->vec_costs->scalar_load_cost;
8891 case scalar_store:
8892 return current_tune->vec_costs->scalar_store_cost;
8894 case vector_stmt:
8895 return current_tune->vec_costs->vec_stmt_cost;
8897 case vector_load:
8898 return current_tune->vec_costs->vec_align_load_cost;
8900 case vector_store:
8901 return current_tune->vec_costs->vec_store_cost;
8903 case vec_to_scalar:
8904 return current_tune->vec_costs->vec_to_scalar_cost;
8906 case scalar_to_vec:
8907 return current_tune->vec_costs->scalar_to_vec_cost;
8909 case unaligned_load:
8910 return current_tune->vec_costs->vec_unalign_load_cost;
8912 case unaligned_store:
8913 return current_tune->vec_costs->vec_unalign_store_cost;
8915 case cond_branch_taken:
8916 return current_tune->vec_costs->cond_taken_branch_cost;
8918 case cond_branch_not_taken:
8919 return current_tune->vec_costs->cond_not_taken_branch_cost;
8921 case vec_perm:
8922 case vec_promote_demote:
8923 return current_tune->vec_costs->vec_stmt_cost;
8925 case vec_construct:
8926 elements = TYPE_VECTOR_SUBPARTS (vectype);
8927 return elements / 2 + 1;
8929 default:
8930 gcc_unreachable ();
8934 /* Implement targetm.vectorize.add_stmt_cost. */
8936 static unsigned
8937 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8938 struct _stmt_vec_info *stmt_info, int misalign,
8939 enum vect_cost_model_location where)
8941 unsigned *cost = (unsigned *) data;
8942 unsigned retval = 0;
8944 if (flag_vect_cost_model)
8946 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8947 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8949 /* Statements in an inner loop relative to the loop being
8950 vectorized are weighted more heavily. The value here is
8951 arbitrary and could potentially be improved with analysis. */
8952 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8953 count *= 50; /* FIXME. */
8955 retval = (unsigned) (count * stmt_cost);
8956 cost[where] += retval;
8959 return retval;
8962 /* Return true if and only if this insn can dual-issue only as older. */
8963 static bool
8964 cortexa7_older_only (rtx insn)
8966 if (recog_memoized (insn) < 0)
8967 return false;
8969 switch (get_attr_type (insn))
8971 case TYPE_ALU_REG:
8972 case TYPE_ALUS_REG:
8973 case TYPE_LOGIC_REG:
8974 case TYPE_LOGICS_REG:
8975 case TYPE_ADC_REG:
8976 case TYPE_ADCS_REG:
8977 case TYPE_ADR:
8978 case TYPE_BFM:
8979 case TYPE_REV:
8980 case TYPE_MVN_REG:
8981 case TYPE_SHIFT_IMM:
8982 case TYPE_SHIFT_REG:
8983 case TYPE_LOAD_BYTE:
8984 case TYPE_LOAD1:
8985 case TYPE_STORE1:
8986 case TYPE_FFARITHS:
8987 case TYPE_FADDS:
8988 case TYPE_FFARITHD:
8989 case TYPE_FADDD:
8990 case TYPE_FMOV:
8991 case TYPE_F_CVT:
8992 case TYPE_FCMPS:
8993 case TYPE_FCMPD:
8994 case TYPE_FCONSTS:
8995 case TYPE_FCONSTD:
8996 case TYPE_FMULS:
8997 case TYPE_FMACS:
8998 case TYPE_FMULD:
8999 case TYPE_FMACD:
9000 case TYPE_FDIVS:
9001 case TYPE_FDIVD:
9002 case TYPE_F_MRC:
9003 case TYPE_F_MRRC:
9004 case TYPE_F_FLAG:
9005 case TYPE_F_LOADS:
9006 case TYPE_F_STORES:
9007 return true;
9008 default:
9009 return false;
9013 /* Return true if and only if this insn can dual-issue as younger. */
9014 static bool
9015 cortexa7_younger (FILE *file, int verbose, rtx insn)
9017 if (recog_memoized (insn) < 0)
9019 if (verbose > 5)
9020 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
9021 return false;
9024 switch (get_attr_type (insn))
9026 case TYPE_ALU_IMM:
9027 case TYPE_ALUS_IMM:
9028 case TYPE_LOGIC_IMM:
9029 case TYPE_LOGICS_IMM:
9030 case TYPE_EXTEND:
9031 case TYPE_MVN_IMM:
9032 case TYPE_MOV_IMM:
9033 case TYPE_MOV_REG:
9034 case TYPE_MOV_SHIFT:
9035 case TYPE_MOV_SHIFT_REG:
9036 case TYPE_BRANCH:
9037 case TYPE_CALL:
9038 return true;
9039 default:
9040 return false;
9045 /* Look for an instruction that can dual issue only as an older
9046 instruction, and move it in front of any instructions that can
9047 dual-issue as younger, while preserving the relative order of all
9048 other instructions in the ready list. This is a hueuristic to help
9049 dual-issue in later cycles, by postponing issue of more flexible
9050 instructions. This heuristic may affect dual issue opportunities
9051 in the current cycle. */
9052 static void
9053 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9054 int clock)
9056 int i;
9057 int first_older_only = -1, first_younger = -1;
9059 if (verbose > 5)
9060 fprintf (file,
9061 ";; sched_reorder for cycle %d with %d insns in ready list\n",
9062 clock,
9063 *n_readyp);
9065 /* Traverse the ready list from the head (the instruction to issue
9066 first), and looking for the first instruction that can issue as
9067 younger and the first instruction that can dual-issue only as
9068 older. */
9069 for (i = *n_readyp - 1; i >= 0; i--)
9071 rtx insn = ready[i];
9072 if (cortexa7_older_only (insn))
9074 first_older_only = i;
9075 if (verbose > 5)
9076 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
9077 break;
9079 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
9080 first_younger = i;
9083 /* Nothing to reorder because either no younger insn found or insn
9084 that can dual-issue only as older appears before any insn that
9085 can dual-issue as younger. */
9086 if (first_younger == -1)
9088 if (verbose > 5)
9089 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
9090 return;
9093 /* Nothing to reorder because no older-only insn in the ready list. */
9094 if (first_older_only == -1)
9096 if (verbose > 5)
9097 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
9098 return;
9101 /* Move first_older_only insn before first_younger. */
9102 if (verbose > 5)
9103 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
9104 INSN_UID(ready [first_older_only]),
9105 INSN_UID(ready [first_younger]));
9106 rtx first_older_only_insn = ready [first_older_only];
9107 for (i = first_older_only; i < first_younger; i++)
9109 ready[i] = ready[i+1];
9112 ready[i] = first_older_only_insn;
9113 return;
9116 /* Implement TARGET_SCHED_REORDER. */
9117 static int
9118 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9119 int clock)
9121 switch (arm_tune)
9123 case cortexa7:
9124 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9125 break;
9126 default:
9127 /* Do nothing for other cores. */
9128 break;
9131 return arm_issue_rate ();
9134 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9135 It corrects the value of COST based on the relationship between
9136 INSN and DEP through the dependence LINK. It returns the new
9137 value. There is a per-core adjust_cost hook to adjust scheduler costs
9138 and the per-core hook can choose to completely override the generic
9139 adjust_cost function. Only put bits of code into arm_adjust_cost that
9140 are common across all cores. */
9141 static int
9142 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9144 rtx i_pat, d_pat;
9146 /* When generating Thumb-1 code, we want to place flag-setting operations
9147 close to a conditional branch which depends on them, so that we can
9148 omit the comparison. */
9149 if (TARGET_THUMB1
9150 && REG_NOTE_KIND (link) == 0
9151 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9152 && recog_memoized (dep) >= 0
9153 && get_attr_conds (dep) == CONDS_SET)
9154 return 0;
9156 if (current_tune->sched_adjust_cost != NULL)
9158 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9159 return cost;
9162 /* XXX Is this strictly true? */
9163 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9164 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9165 return 0;
9167 /* Call insns don't incur a stall, even if they follow a load. */
9168 if (REG_NOTE_KIND (link) == 0
9169 && CALL_P (insn))
9170 return 1;
9172 if ((i_pat = single_set (insn)) != NULL
9173 && MEM_P (SET_SRC (i_pat))
9174 && (d_pat = single_set (dep)) != NULL
9175 && MEM_P (SET_DEST (d_pat)))
9177 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9178 /* This is a load after a store, there is no conflict if the load reads
9179 from a cached area. Assume that loads from the stack, and from the
9180 constant pool are cached, and that others will miss. This is a
9181 hack. */
9183 if ((GET_CODE (src_mem) == SYMBOL_REF
9184 && CONSTANT_POOL_ADDRESS_P (src_mem))
9185 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9186 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9187 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9188 return 1;
9191 return cost;
9195 arm_max_conditional_execute (void)
9197 return max_insns_skipped;
9200 static int
9201 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9203 if (TARGET_32BIT)
9204 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9205 else
9206 return (optimize > 0) ? 2 : 0;
9209 static int
9210 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9212 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9215 static bool fp_consts_inited = false;
9217 static REAL_VALUE_TYPE value_fp0;
9219 static void
9220 init_fp_table (void)
9222 REAL_VALUE_TYPE r;
9224 r = REAL_VALUE_ATOF ("0", DFmode);
9225 value_fp0 = r;
9226 fp_consts_inited = true;
9229 /* Return TRUE if rtx X is a valid immediate FP constant. */
9231 arm_const_double_rtx (rtx x)
9233 REAL_VALUE_TYPE r;
9235 if (!fp_consts_inited)
9236 init_fp_table ();
9238 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9239 if (REAL_VALUE_MINUS_ZERO (r))
9240 return 0;
9242 if (REAL_VALUES_EQUAL (r, value_fp0))
9243 return 1;
9245 return 0;
9248 /* VFPv3 has a fairly wide range of representable immediates, formed from
9249 "quarter-precision" floating-point values. These can be evaluated using this
9250 formula (with ^ for exponentiation):
9252 -1^s * n * 2^-r
9254 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9255 16 <= n <= 31 and 0 <= r <= 7.
9257 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9259 - A (most-significant) is the sign bit.
9260 - BCD are the exponent (encoded as r XOR 3).
9261 - EFGH are the mantissa (encoded as n - 16).
9264 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9265 fconst[sd] instruction, or -1 if X isn't suitable. */
9266 static int
9267 vfp3_const_double_index (rtx x)
9269 REAL_VALUE_TYPE r, m;
9270 int sign, exponent;
9271 unsigned HOST_WIDE_INT mantissa, mant_hi;
9272 unsigned HOST_WIDE_INT mask;
9273 HOST_WIDE_INT m1, m2;
9274 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9276 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9277 return -1;
9279 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9281 /* We can't represent these things, so detect them first. */
9282 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9283 return -1;
9285 /* Extract sign, exponent and mantissa. */
9286 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9287 r = real_value_abs (&r);
9288 exponent = REAL_EXP (&r);
9289 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9290 highest (sign) bit, with a fixed binary point at bit point_pos.
9291 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9292 bits for the mantissa, this may fail (low bits would be lost). */
9293 real_ldexp (&m, &r, point_pos - exponent);
9294 REAL_VALUE_TO_INT (&m1, &m2, m);
9295 mantissa = m1;
9296 mant_hi = m2;
9298 /* If there are bits set in the low part of the mantissa, we can't
9299 represent this value. */
9300 if (mantissa != 0)
9301 return -1;
9303 /* Now make it so that mantissa contains the most-significant bits, and move
9304 the point_pos to indicate that the least-significant bits have been
9305 discarded. */
9306 point_pos -= HOST_BITS_PER_WIDE_INT;
9307 mantissa = mant_hi;
9309 /* We can permit four significant bits of mantissa only, plus a high bit
9310 which is always 1. */
9311 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9312 if ((mantissa & mask) != 0)
9313 return -1;
9315 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9316 mantissa >>= point_pos - 5;
9318 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9319 floating-point immediate zero with Neon using an integer-zero load, but
9320 that case is handled elsewhere.) */
9321 if (mantissa == 0)
9322 return -1;
9324 gcc_assert (mantissa >= 16 && mantissa <= 31);
9326 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9327 normalized significands are in the range [1, 2). (Our mantissa is shifted
9328 left 4 places at this point relative to normalized IEEE754 values). GCC
9329 internally uses [0.5, 1) (see real.c), so the exponent returned from
9330 REAL_EXP must be altered. */
9331 exponent = 5 - exponent;
9333 if (exponent < 0 || exponent > 7)
9334 return -1;
9336 /* Sign, mantissa and exponent are now in the correct form to plug into the
9337 formula described in the comment above. */
9338 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9341 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9343 vfp3_const_double_rtx (rtx x)
9345 if (!TARGET_VFP3)
9346 return 0;
9348 return vfp3_const_double_index (x) != -1;
9351 /* Recognize immediates which can be used in various Neon instructions. Legal
9352 immediates are described by the following table (for VMVN variants, the
9353 bitwise inverse of the constant shown is recognized. In either case, VMOV
9354 is output and the correct instruction to use for a given constant is chosen
9355 by the assembler). The constant shown is replicated across all elements of
9356 the destination vector.
9358 insn elems variant constant (binary)
9359 ---- ----- ------- -----------------
9360 vmov i32 0 00000000 00000000 00000000 abcdefgh
9361 vmov i32 1 00000000 00000000 abcdefgh 00000000
9362 vmov i32 2 00000000 abcdefgh 00000000 00000000
9363 vmov i32 3 abcdefgh 00000000 00000000 00000000
9364 vmov i16 4 00000000 abcdefgh
9365 vmov i16 5 abcdefgh 00000000
9366 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9367 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9368 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9369 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9370 vmvn i16 10 00000000 abcdefgh
9371 vmvn i16 11 abcdefgh 00000000
9372 vmov i32 12 00000000 00000000 abcdefgh 11111111
9373 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9374 vmov i32 14 00000000 abcdefgh 11111111 11111111
9375 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9376 vmov i8 16 abcdefgh
9377 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9378 eeeeeeee ffffffff gggggggg hhhhhhhh
9379 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9380 vmov f32 19 00000000 00000000 00000000 00000000
9382 For case 18, B = !b. Representable values are exactly those accepted by
9383 vfp3_const_double_index, but are output as floating-point numbers rather
9384 than indices.
9386 For case 19, we will change it to vmov.i32 when assembling.
9388 Variants 0-5 (inclusive) may also be used as immediates for the second
9389 operand of VORR/VBIC instructions.
9391 The INVERSE argument causes the bitwise inverse of the given operand to be
9392 recognized instead (used for recognizing legal immediates for the VAND/VORN
9393 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9394 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9395 output, rather than the real insns vbic/vorr).
9397 INVERSE makes no difference to the recognition of float vectors.
9399 The return value is the variant of immediate as shown in the above table, or
9400 -1 if the given value doesn't match any of the listed patterns.
9402 static int
9403 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9404 rtx *modconst, int *elementwidth)
9406 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9407 matches = 1; \
9408 for (i = 0; i < idx; i += (STRIDE)) \
9409 if (!(TEST)) \
9410 matches = 0; \
9411 if (matches) \
9413 immtype = (CLASS); \
9414 elsize = (ELSIZE); \
9415 break; \
9418 unsigned int i, elsize = 0, idx = 0, n_elts;
9419 unsigned int innersize;
9420 unsigned char bytes[16];
9421 int immtype = -1, matches;
9422 unsigned int invmask = inverse ? 0xff : 0;
9423 bool vector = GET_CODE (op) == CONST_VECTOR;
9425 if (vector)
9427 n_elts = CONST_VECTOR_NUNITS (op);
9428 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9430 else
9432 n_elts = 1;
9433 if (mode == VOIDmode)
9434 mode = DImode;
9435 innersize = GET_MODE_SIZE (mode);
9438 /* Vectors of float constants. */
9439 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9441 rtx el0 = CONST_VECTOR_ELT (op, 0);
9442 REAL_VALUE_TYPE r0;
9444 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9445 return -1;
9447 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9449 for (i = 1; i < n_elts; i++)
9451 rtx elt = CONST_VECTOR_ELT (op, i);
9452 REAL_VALUE_TYPE re;
9454 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9456 if (!REAL_VALUES_EQUAL (r0, re))
9457 return -1;
9460 if (modconst)
9461 *modconst = CONST_VECTOR_ELT (op, 0);
9463 if (elementwidth)
9464 *elementwidth = 0;
9466 if (el0 == CONST0_RTX (GET_MODE (el0)))
9467 return 19;
9468 else
9469 return 18;
9472 /* Splat vector constant out into a byte vector. */
9473 for (i = 0; i < n_elts; i++)
9475 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9476 unsigned HOST_WIDE_INT elpart;
9477 unsigned int part, parts;
9479 if (CONST_INT_P (el))
9481 elpart = INTVAL (el);
9482 parts = 1;
9484 else if (CONST_DOUBLE_P (el))
9486 elpart = CONST_DOUBLE_LOW (el);
9487 parts = 2;
9489 else
9490 gcc_unreachable ();
9492 for (part = 0; part < parts; part++)
9494 unsigned int byte;
9495 for (byte = 0; byte < innersize; byte++)
9497 bytes[idx++] = (elpart & 0xff) ^ invmask;
9498 elpart >>= BITS_PER_UNIT;
9500 if (CONST_DOUBLE_P (el))
9501 elpart = CONST_DOUBLE_HIGH (el);
9505 /* Sanity check. */
9506 gcc_assert (idx == GET_MODE_SIZE (mode));
9510 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9511 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9513 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9514 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9516 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9517 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9519 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9520 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9522 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9524 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9526 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9527 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9529 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9530 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9532 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9533 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9535 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9536 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9538 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9540 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9542 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9543 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9545 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9546 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9548 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9549 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9551 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9552 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9554 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9556 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9557 && bytes[i] == bytes[(i + 8) % idx]);
9559 while (0);
9561 if (immtype == -1)
9562 return -1;
9564 if (elementwidth)
9565 *elementwidth = elsize;
9567 if (modconst)
9569 unsigned HOST_WIDE_INT imm = 0;
9571 /* Un-invert bytes of recognized vector, if necessary. */
9572 if (invmask != 0)
9573 for (i = 0; i < idx; i++)
9574 bytes[i] ^= invmask;
9576 if (immtype == 17)
9578 /* FIXME: Broken on 32-bit H_W_I hosts. */
9579 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9581 for (i = 0; i < 8; i++)
9582 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9583 << (i * BITS_PER_UNIT);
9585 *modconst = GEN_INT (imm);
9587 else
9589 unsigned HOST_WIDE_INT imm = 0;
9591 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9592 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9594 *modconst = GEN_INT (imm);
9598 return immtype;
9599 #undef CHECK
9602 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9603 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9604 float elements), and a modified constant (whatever should be output for a
9605 VMOV) in *MODCONST. */
9608 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9609 rtx *modconst, int *elementwidth)
9611 rtx tmpconst;
9612 int tmpwidth;
9613 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9615 if (retval == -1)
9616 return 0;
9618 if (modconst)
9619 *modconst = tmpconst;
9621 if (elementwidth)
9622 *elementwidth = tmpwidth;
9624 return 1;
9627 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9628 the immediate is valid, write a constant suitable for using as an operand
9629 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9630 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9633 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9634 rtx *modconst, int *elementwidth)
9636 rtx tmpconst;
9637 int tmpwidth;
9638 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9640 if (retval < 0 || retval > 5)
9641 return 0;
9643 if (modconst)
9644 *modconst = tmpconst;
9646 if (elementwidth)
9647 *elementwidth = tmpwidth;
9649 return 1;
9652 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9653 the immediate is valid, write a constant suitable for using as an operand
9654 to VSHR/VSHL to *MODCONST and the corresponding element width to
9655 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9656 because they have different limitations. */
9659 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9660 rtx *modconst, int *elementwidth,
9661 bool isleftshift)
9663 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9664 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9665 unsigned HOST_WIDE_INT last_elt = 0;
9666 unsigned HOST_WIDE_INT maxshift;
9668 /* Split vector constant out into a byte vector. */
9669 for (i = 0; i < n_elts; i++)
9671 rtx el = CONST_VECTOR_ELT (op, i);
9672 unsigned HOST_WIDE_INT elpart;
9674 if (CONST_INT_P (el))
9675 elpart = INTVAL (el);
9676 else if (CONST_DOUBLE_P (el))
9677 return 0;
9678 else
9679 gcc_unreachable ();
9681 if (i != 0 && elpart != last_elt)
9682 return 0;
9684 last_elt = elpart;
9687 /* Shift less than element size. */
9688 maxshift = innersize * 8;
9690 if (isleftshift)
9692 /* Left shift immediate value can be from 0 to <size>-1. */
9693 if (last_elt >= maxshift)
9694 return 0;
9696 else
9698 /* Right shift immediate value can be from 1 to <size>. */
9699 if (last_elt == 0 || last_elt > maxshift)
9700 return 0;
9703 if (elementwidth)
9704 *elementwidth = innersize * 8;
9706 if (modconst)
9707 *modconst = CONST_VECTOR_ELT (op, 0);
9709 return 1;
9712 /* Return a string suitable for output of Neon immediate logic operation
9713 MNEM. */
9715 char *
9716 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9717 int inverse, int quad)
9719 int width, is_valid;
9720 static char templ[40];
9722 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9724 gcc_assert (is_valid != 0);
9726 if (quad)
9727 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9728 else
9729 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9731 return templ;
9734 /* Return a string suitable for output of Neon immediate shift operation
9735 (VSHR or VSHL) MNEM. */
9737 char *
9738 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9739 enum machine_mode mode, int quad,
9740 bool isleftshift)
9742 int width, is_valid;
9743 static char templ[40];
9745 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9746 gcc_assert (is_valid != 0);
9748 if (quad)
9749 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9750 else
9751 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9753 return templ;
9756 /* Output a sequence of pairwise operations to implement a reduction.
9757 NOTE: We do "too much work" here, because pairwise operations work on two
9758 registers-worth of operands in one go. Unfortunately we can't exploit those
9759 extra calculations to do the full operation in fewer steps, I don't think.
9760 Although all vector elements of the result but the first are ignored, we
9761 actually calculate the same result in each of the elements. An alternative
9762 such as initially loading a vector with zero to use as each of the second
9763 operands would use up an additional register and take an extra instruction,
9764 for no particular gain. */
9766 void
9767 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9768 rtx (*reduc) (rtx, rtx, rtx))
9770 enum machine_mode inner = GET_MODE_INNER (mode);
9771 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9772 rtx tmpsum = op1;
9774 for (i = parts / 2; i >= 1; i /= 2)
9776 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9777 emit_insn (reduc (dest, tmpsum, tmpsum));
9778 tmpsum = dest;
9782 /* If VALS is a vector constant that can be loaded into a register
9783 using VDUP, generate instructions to do so and return an RTX to
9784 assign to the register. Otherwise return NULL_RTX. */
9786 static rtx
9787 neon_vdup_constant (rtx vals)
9789 enum machine_mode mode = GET_MODE (vals);
9790 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9791 int n_elts = GET_MODE_NUNITS (mode);
9792 bool all_same = true;
9793 rtx x;
9794 int i;
9796 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9797 return NULL_RTX;
9799 for (i = 0; i < n_elts; ++i)
9801 x = XVECEXP (vals, 0, i);
9802 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9803 all_same = false;
9806 if (!all_same)
9807 /* The elements are not all the same. We could handle repeating
9808 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9809 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9810 vdup.i16). */
9811 return NULL_RTX;
9813 /* We can load this constant by using VDUP and a constant in a
9814 single ARM register. This will be cheaper than a vector
9815 load. */
9817 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9818 return gen_rtx_VEC_DUPLICATE (mode, x);
9821 /* Generate code to load VALS, which is a PARALLEL containing only
9822 constants (for vec_init) or CONST_VECTOR, efficiently into a
9823 register. Returns an RTX to copy into the register, or NULL_RTX
9824 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9827 neon_make_constant (rtx vals)
9829 enum machine_mode mode = GET_MODE (vals);
9830 rtx target;
9831 rtx const_vec = NULL_RTX;
9832 int n_elts = GET_MODE_NUNITS (mode);
9833 int n_const = 0;
9834 int i;
9836 if (GET_CODE (vals) == CONST_VECTOR)
9837 const_vec = vals;
9838 else if (GET_CODE (vals) == PARALLEL)
9840 /* A CONST_VECTOR must contain only CONST_INTs and
9841 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9842 Only store valid constants in a CONST_VECTOR. */
9843 for (i = 0; i < n_elts; ++i)
9845 rtx x = XVECEXP (vals, 0, i);
9846 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9847 n_const++;
9849 if (n_const == n_elts)
9850 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9852 else
9853 gcc_unreachable ();
9855 if (const_vec != NULL
9856 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9857 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9858 return const_vec;
9859 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9860 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9861 pipeline cycle; creating the constant takes one or two ARM
9862 pipeline cycles. */
9863 return target;
9864 else if (const_vec != NULL_RTX)
9865 /* Load from constant pool. On Cortex-A8 this takes two cycles
9866 (for either double or quad vectors). We can not take advantage
9867 of single-cycle VLD1 because we need a PC-relative addressing
9868 mode. */
9869 return const_vec;
9870 else
9871 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9872 We can not construct an initializer. */
9873 return NULL_RTX;
9876 /* Initialize vector TARGET to VALS. */
9878 void
9879 neon_expand_vector_init (rtx target, rtx vals)
9881 enum machine_mode mode = GET_MODE (target);
9882 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9883 int n_elts = GET_MODE_NUNITS (mode);
9884 int n_var = 0, one_var = -1;
9885 bool all_same = true;
9886 rtx x, mem;
9887 int i;
9889 for (i = 0; i < n_elts; ++i)
9891 x = XVECEXP (vals, 0, i);
9892 if (!CONSTANT_P (x))
9893 ++n_var, one_var = i;
9895 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9896 all_same = false;
9899 if (n_var == 0)
9901 rtx constant = neon_make_constant (vals);
9902 if (constant != NULL_RTX)
9904 emit_move_insn (target, constant);
9905 return;
9909 /* Splat a single non-constant element if we can. */
9910 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9912 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9913 emit_insn (gen_rtx_SET (VOIDmode, target,
9914 gen_rtx_VEC_DUPLICATE (mode, x)));
9915 return;
9918 /* One field is non-constant. Load constant then overwrite varying
9919 field. This is more efficient than using the stack. */
9920 if (n_var == 1)
9922 rtx copy = copy_rtx (vals);
9923 rtx index = GEN_INT (one_var);
9925 /* Load constant part of vector, substitute neighboring value for
9926 varying element. */
9927 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9928 neon_expand_vector_init (target, copy);
9930 /* Insert variable. */
9931 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9932 switch (mode)
9934 case V8QImode:
9935 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9936 break;
9937 case V16QImode:
9938 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9939 break;
9940 case V4HImode:
9941 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9942 break;
9943 case V8HImode:
9944 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9945 break;
9946 case V2SImode:
9947 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9948 break;
9949 case V4SImode:
9950 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9951 break;
9952 case V2SFmode:
9953 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9954 break;
9955 case V4SFmode:
9956 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9957 break;
9958 case V2DImode:
9959 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9960 break;
9961 default:
9962 gcc_unreachable ();
9964 return;
9967 /* Construct the vector in memory one field at a time
9968 and load the whole vector. */
9969 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9970 for (i = 0; i < n_elts; i++)
9971 emit_move_insn (adjust_address_nv (mem, inner_mode,
9972 i * GET_MODE_SIZE (inner_mode)),
9973 XVECEXP (vals, 0, i));
9974 emit_move_insn (target, mem);
9977 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9978 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9979 reported source locations are bogus. */
9981 static void
9982 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9983 const char *err)
9985 HOST_WIDE_INT lane;
9987 gcc_assert (CONST_INT_P (operand));
9989 lane = INTVAL (operand);
9991 if (lane < low || lane >= high)
9992 error (err);
9995 /* Bounds-check lanes. */
9997 void
9998 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
10000 bounds_check (operand, low, high, "lane out of range");
10003 /* Bounds-check constants. */
10005 void
10006 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
10008 bounds_check (operand, low, high, "constant out of range");
10011 HOST_WIDE_INT
10012 neon_element_bits (enum machine_mode mode)
10014 if (mode == DImode)
10015 return GET_MODE_BITSIZE (mode);
10016 else
10017 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
10021 /* Predicates for `match_operand' and `match_operator'. */
10023 /* Return TRUE if OP is a valid coprocessor memory address pattern.
10024 WB is true if full writeback address modes are allowed and is false
10025 if limited writeback address modes (POST_INC and PRE_DEC) are
10026 allowed. */
10029 arm_coproc_mem_operand (rtx op, bool wb)
10031 rtx ind;
10033 /* Reject eliminable registers. */
10034 if (! (reload_in_progress || reload_completed)
10035 && ( reg_mentioned_p (frame_pointer_rtx, op)
10036 || reg_mentioned_p (arg_pointer_rtx, op)
10037 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10038 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10039 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10040 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10041 return FALSE;
10043 /* Constants are converted into offsets from labels. */
10044 if (!MEM_P (op))
10045 return FALSE;
10047 ind = XEXP (op, 0);
10049 if (reload_completed
10050 && (GET_CODE (ind) == LABEL_REF
10051 || (GET_CODE (ind) == CONST
10052 && GET_CODE (XEXP (ind, 0)) == PLUS
10053 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10054 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10055 return TRUE;
10057 /* Match: (mem (reg)). */
10058 if (REG_P (ind))
10059 return arm_address_register_rtx_p (ind, 0);
10061 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
10062 acceptable in any case (subject to verification by
10063 arm_address_register_rtx_p). We need WB to be true to accept
10064 PRE_INC and POST_DEC. */
10065 if (GET_CODE (ind) == POST_INC
10066 || GET_CODE (ind) == PRE_DEC
10067 || (wb
10068 && (GET_CODE (ind) == PRE_INC
10069 || GET_CODE (ind) == POST_DEC)))
10070 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10072 if (wb
10073 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
10074 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
10075 && GET_CODE (XEXP (ind, 1)) == PLUS
10076 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
10077 ind = XEXP (ind, 1);
10079 /* Match:
10080 (plus (reg)
10081 (const)). */
10082 if (GET_CODE (ind) == PLUS
10083 && REG_P (XEXP (ind, 0))
10084 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10085 && CONST_INT_P (XEXP (ind, 1))
10086 && INTVAL (XEXP (ind, 1)) > -1024
10087 && INTVAL (XEXP (ind, 1)) < 1024
10088 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10089 return TRUE;
10091 return FALSE;
10094 /* Return TRUE if OP is a memory operand which we can load or store a vector
10095 to/from. TYPE is one of the following values:
10096 0 - Vector load/stor (vldr)
10097 1 - Core registers (ldm)
10098 2 - Element/structure loads (vld1)
10101 neon_vector_mem_operand (rtx op, int type)
10103 rtx ind;
10105 /* Reject eliminable registers. */
10106 if (! (reload_in_progress || reload_completed)
10107 && ( reg_mentioned_p (frame_pointer_rtx, op)
10108 || reg_mentioned_p (arg_pointer_rtx, op)
10109 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10110 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10111 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10112 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10113 return FALSE;
10115 /* Constants are converted into offsets from labels. */
10116 if (!MEM_P (op))
10117 return FALSE;
10119 ind = XEXP (op, 0);
10121 if (reload_completed
10122 && (GET_CODE (ind) == LABEL_REF
10123 || (GET_CODE (ind) == CONST
10124 && GET_CODE (XEXP (ind, 0)) == PLUS
10125 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10126 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10127 return TRUE;
10129 /* Match: (mem (reg)). */
10130 if (REG_P (ind))
10131 return arm_address_register_rtx_p (ind, 0);
10133 /* Allow post-increment with Neon registers. */
10134 if ((type != 1 && GET_CODE (ind) == POST_INC)
10135 || (type == 0 && GET_CODE (ind) == PRE_DEC))
10136 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10138 /* FIXME: vld1 allows register post-modify. */
10140 /* Match:
10141 (plus (reg)
10142 (const)). */
10143 if (type == 0
10144 && GET_CODE (ind) == PLUS
10145 && REG_P (XEXP (ind, 0))
10146 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10147 && CONST_INT_P (XEXP (ind, 1))
10148 && INTVAL (XEXP (ind, 1)) > -1024
10149 /* For quad modes, we restrict the constant offset to be slightly less
10150 than what the instruction format permits. We have no such constraint
10151 on double mode offsets. (This must match arm_legitimate_index_p.) */
10152 && (INTVAL (XEXP (ind, 1))
10153 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10154 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10155 return TRUE;
10157 return FALSE;
10160 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10161 type. */
10163 neon_struct_mem_operand (rtx op)
10165 rtx ind;
10167 /* Reject eliminable registers. */
10168 if (! (reload_in_progress || reload_completed)
10169 && ( reg_mentioned_p (frame_pointer_rtx, op)
10170 || reg_mentioned_p (arg_pointer_rtx, op)
10171 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10172 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10173 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10174 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10175 return FALSE;
10177 /* Constants are converted into offsets from labels. */
10178 if (!MEM_P (op))
10179 return FALSE;
10181 ind = XEXP (op, 0);
10183 if (reload_completed
10184 && (GET_CODE (ind) == LABEL_REF
10185 || (GET_CODE (ind) == CONST
10186 && GET_CODE (XEXP (ind, 0)) == PLUS
10187 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10188 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10189 return TRUE;
10191 /* Match: (mem (reg)). */
10192 if (REG_P (ind))
10193 return arm_address_register_rtx_p (ind, 0);
10195 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10196 if (GET_CODE (ind) == POST_INC
10197 || GET_CODE (ind) == PRE_DEC)
10198 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10200 return FALSE;
10203 /* Return true if X is a register that will be eliminated later on. */
10205 arm_eliminable_register (rtx x)
10207 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10208 || REGNO (x) == ARG_POINTER_REGNUM
10209 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10210 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10213 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10214 coprocessor registers. Otherwise return NO_REGS. */
10216 enum reg_class
10217 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10219 if (mode == HFmode)
10221 if (!TARGET_NEON_FP16)
10222 return GENERAL_REGS;
10223 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10224 return NO_REGS;
10225 return GENERAL_REGS;
10228 /* The neon move patterns handle all legitimate vector and struct
10229 addresses. */
10230 if (TARGET_NEON
10231 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10232 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10233 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10234 || VALID_NEON_STRUCT_MODE (mode)))
10235 return NO_REGS;
10237 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10238 return NO_REGS;
10240 return GENERAL_REGS;
10243 /* Values which must be returned in the most-significant end of the return
10244 register. */
10246 static bool
10247 arm_return_in_msb (const_tree valtype)
10249 return (TARGET_AAPCS_BASED
10250 && BYTES_BIG_ENDIAN
10251 && (AGGREGATE_TYPE_P (valtype)
10252 || TREE_CODE (valtype) == COMPLEX_TYPE
10253 || FIXED_POINT_TYPE_P (valtype)));
10256 /* Return TRUE if X references a SYMBOL_REF. */
10258 symbol_mentioned_p (rtx x)
10260 const char * fmt;
10261 int i;
10263 if (GET_CODE (x) == SYMBOL_REF)
10264 return 1;
10266 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10267 are constant offsets, not symbols. */
10268 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10269 return 0;
10271 fmt = GET_RTX_FORMAT (GET_CODE (x));
10273 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10275 if (fmt[i] == 'E')
10277 int j;
10279 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10280 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10281 return 1;
10283 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10284 return 1;
10287 return 0;
10290 /* Return TRUE if X references a LABEL_REF. */
10292 label_mentioned_p (rtx x)
10294 const char * fmt;
10295 int i;
10297 if (GET_CODE (x) == LABEL_REF)
10298 return 1;
10300 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10301 instruction, but they are constant offsets, not symbols. */
10302 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10303 return 0;
10305 fmt = GET_RTX_FORMAT (GET_CODE (x));
10306 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10308 if (fmt[i] == 'E')
10310 int j;
10312 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10313 if (label_mentioned_p (XVECEXP (x, i, j)))
10314 return 1;
10316 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10317 return 1;
10320 return 0;
10324 tls_mentioned_p (rtx x)
10326 switch (GET_CODE (x))
10328 case CONST:
10329 return tls_mentioned_p (XEXP (x, 0));
10331 case UNSPEC:
10332 if (XINT (x, 1) == UNSPEC_TLS)
10333 return 1;
10335 default:
10336 return 0;
10340 /* Must not copy any rtx that uses a pc-relative address. */
10342 static int
10343 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10345 if (GET_CODE (*x) == UNSPEC
10346 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10347 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10348 return 1;
10349 return 0;
10352 static bool
10353 arm_cannot_copy_insn_p (rtx insn)
10355 /* The tls call insn cannot be copied, as it is paired with a data
10356 word. */
10357 if (recog_memoized (insn) == CODE_FOR_tlscall)
10358 return true;
10360 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10363 enum rtx_code
10364 minmax_code (rtx x)
10366 enum rtx_code code = GET_CODE (x);
10368 switch (code)
10370 case SMAX:
10371 return GE;
10372 case SMIN:
10373 return LE;
10374 case UMIN:
10375 return LEU;
10376 case UMAX:
10377 return GEU;
10378 default:
10379 gcc_unreachable ();
10383 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10385 bool
10386 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10387 int *mask, bool *signed_sat)
10389 /* The high bound must be a power of two minus one. */
10390 int log = exact_log2 (INTVAL (hi_bound) + 1);
10391 if (log == -1)
10392 return false;
10394 /* The low bound is either zero (for usat) or one less than the
10395 negation of the high bound (for ssat). */
10396 if (INTVAL (lo_bound) == 0)
10398 if (mask)
10399 *mask = log;
10400 if (signed_sat)
10401 *signed_sat = false;
10403 return true;
10406 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10408 if (mask)
10409 *mask = log + 1;
10410 if (signed_sat)
10411 *signed_sat = true;
10413 return true;
10416 return false;
10419 /* Return 1 if memory locations are adjacent. */
10421 adjacent_mem_locations (rtx a, rtx b)
10423 /* We don't guarantee to preserve the order of these memory refs. */
10424 if (volatile_refs_p (a) || volatile_refs_p (b))
10425 return 0;
10427 if ((REG_P (XEXP (a, 0))
10428 || (GET_CODE (XEXP (a, 0)) == PLUS
10429 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10430 && (REG_P (XEXP (b, 0))
10431 || (GET_CODE (XEXP (b, 0)) == PLUS
10432 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10434 HOST_WIDE_INT val0 = 0, val1 = 0;
10435 rtx reg0, reg1;
10436 int val_diff;
10438 if (GET_CODE (XEXP (a, 0)) == PLUS)
10440 reg0 = XEXP (XEXP (a, 0), 0);
10441 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10443 else
10444 reg0 = XEXP (a, 0);
10446 if (GET_CODE (XEXP (b, 0)) == PLUS)
10448 reg1 = XEXP (XEXP (b, 0), 0);
10449 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10451 else
10452 reg1 = XEXP (b, 0);
10454 /* Don't accept any offset that will require multiple
10455 instructions to handle, since this would cause the
10456 arith_adjacentmem pattern to output an overlong sequence. */
10457 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10458 return 0;
10460 /* Don't allow an eliminable register: register elimination can make
10461 the offset too large. */
10462 if (arm_eliminable_register (reg0))
10463 return 0;
10465 val_diff = val1 - val0;
10467 if (arm_ld_sched)
10469 /* If the target has load delay slots, then there's no benefit
10470 to using an ldm instruction unless the offset is zero and
10471 we are optimizing for size. */
10472 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10473 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10474 && (val_diff == 4 || val_diff == -4));
10477 return ((REGNO (reg0) == REGNO (reg1))
10478 && (val_diff == 4 || val_diff == -4));
10481 return 0;
10484 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10485 for load operations, false for store operations. CONSECUTIVE is true
10486 if the register numbers in the operation must be consecutive in the register
10487 bank. RETURN_PC is true if value is to be loaded in PC.
10488 The pattern we are trying to match for load is:
10489 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10490 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10493 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10495 where
10496 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10497 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10498 3. If consecutive is TRUE, then for kth register being loaded,
10499 REGNO (R_dk) = REGNO (R_d0) + k.
10500 The pattern for store is similar. */
10501 bool
10502 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10503 bool consecutive, bool return_pc)
10505 HOST_WIDE_INT count = XVECLEN (op, 0);
10506 rtx reg, mem, addr;
10507 unsigned regno;
10508 unsigned first_regno;
10509 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10510 rtx elt;
10511 bool addr_reg_in_reglist = false;
10512 bool update = false;
10513 int reg_increment;
10514 int offset_adj;
10515 int regs_per_val;
10517 /* If not in SImode, then registers must be consecutive
10518 (e.g., VLDM instructions for DFmode). */
10519 gcc_assert ((mode == SImode) || consecutive);
10520 /* Setting return_pc for stores is illegal. */
10521 gcc_assert (!return_pc || load);
10523 /* Set up the increments and the regs per val based on the mode. */
10524 reg_increment = GET_MODE_SIZE (mode);
10525 regs_per_val = reg_increment / 4;
10526 offset_adj = return_pc ? 1 : 0;
10528 if (count <= 1
10529 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10530 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10531 return false;
10533 /* Check if this is a write-back. */
10534 elt = XVECEXP (op, 0, offset_adj);
10535 if (GET_CODE (SET_SRC (elt)) == PLUS)
10537 i++;
10538 base = 1;
10539 update = true;
10541 /* The offset adjustment must be the number of registers being
10542 popped times the size of a single register. */
10543 if (!REG_P (SET_DEST (elt))
10544 || !REG_P (XEXP (SET_SRC (elt), 0))
10545 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10546 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10547 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10548 ((count - 1 - offset_adj) * reg_increment))
10549 return false;
10552 i = i + offset_adj;
10553 base = base + offset_adj;
10554 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10555 success depends on the type: VLDM can do just one reg,
10556 LDM must do at least two. */
10557 if ((count <= i) && (mode == SImode))
10558 return false;
10560 elt = XVECEXP (op, 0, i - 1);
10561 if (GET_CODE (elt) != SET)
10562 return false;
10564 if (load)
10566 reg = SET_DEST (elt);
10567 mem = SET_SRC (elt);
10569 else
10571 reg = SET_SRC (elt);
10572 mem = SET_DEST (elt);
10575 if (!REG_P (reg) || !MEM_P (mem))
10576 return false;
10578 regno = REGNO (reg);
10579 first_regno = regno;
10580 addr = XEXP (mem, 0);
10581 if (GET_CODE (addr) == PLUS)
10583 if (!CONST_INT_P (XEXP (addr, 1)))
10584 return false;
10586 offset = INTVAL (XEXP (addr, 1));
10587 addr = XEXP (addr, 0);
10590 if (!REG_P (addr))
10591 return false;
10593 /* Don't allow SP to be loaded unless it is also the base register. It
10594 guarantees that SP is reset correctly when an LDM instruction
10595 is interruptted. Otherwise, we might end up with a corrupt stack. */
10596 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10597 return false;
10599 for (; i < count; i++)
10601 elt = XVECEXP (op, 0, i);
10602 if (GET_CODE (elt) != SET)
10603 return false;
10605 if (load)
10607 reg = SET_DEST (elt);
10608 mem = SET_SRC (elt);
10610 else
10612 reg = SET_SRC (elt);
10613 mem = SET_DEST (elt);
10616 if (!REG_P (reg)
10617 || GET_MODE (reg) != mode
10618 || REGNO (reg) <= regno
10619 || (consecutive
10620 && (REGNO (reg) !=
10621 (unsigned int) (first_regno + regs_per_val * (i - base))))
10622 /* Don't allow SP to be loaded unless it is also the base register. It
10623 guarantees that SP is reset correctly when an LDM instruction
10624 is interrupted. Otherwise, we might end up with a corrupt stack. */
10625 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10626 || !MEM_P (mem)
10627 || GET_MODE (mem) != mode
10628 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10629 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10630 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10631 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10632 offset + (i - base) * reg_increment))
10633 && (!REG_P (XEXP (mem, 0))
10634 || offset + (i - base) * reg_increment != 0)))
10635 return false;
10637 regno = REGNO (reg);
10638 if (regno == REGNO (addr))
10639 addr_reg_in_reglist = true;
10642 if (load)
10644 if (update && addr_reg_in_reglist)
10645 return false;
10647 /* For Thumb-1, address register is always modified - either by write-back
10648 or by explicit load. If the pattern does not describe an update,
10649 then the address register must be in the list of loaded registers. */
10650 if (TARGET_THUMB1)
10651 return update || addr_reg_in_reglist;
10654 return true;
10657 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10658 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10659 instruction. ADD_OFFSET is nonzero if the base address register needs
10660 to be modified with an add instruction before we can use it. */
10662 static bool
10663 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10664 int nops, HOST_WIDE_INT add_offset)
10666 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10667 if the offset isn't small enough. The reason 2 ldrs are faster
10668 is because these ARMs are able to do more than one cache access
10669 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10670 whilst the ARM8 has a double bandwidth cache. This means that
10671 these cores can do both an instruction fetch and a data fetch in
10672 a single cycle, so the trick of calculating the address into a
10673 scratch register (one of the result regs) and then doing a load
10674 multiple actually becomes slower (and no smaller in code size).
10675 That is the transformation
10677 ldr rd1, [rbase + offset]
10678 ldr rd2, [rbase + offset + 4]
10682 add rd1, rbase, offset
10683 ldmia rd1, {rd1, rd2}
10685 produces worse code -- '3 cycles + any stalls on rd2' instead of
10686 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10687 access per cycle, the first sequence could never complete in less
10688 than 6 cycles, whereas the ldm sequence would only take 5 and
10689 would make better use of sequential accesses if not hitting the
10690 cache.
10692 We cheat here and test 'arm_ld_sched' which we currently know to
10693 only be true for the ARM8, ARM9 and StrongARM. If this ever
10694 changes, then the test below needs to be reworked. */
10695 if (nops == 2 && arm_ld_sched && add_offset != 0)
10696 return false;
10698 /* XScale has load-store double instructions, but they have stricter
10699 alignment requirements than load-store multiple, so we cannot
10700 use them.
10702 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10703 the pipeline until completion.
10705 NREGS CYCLES
10711 An ldr instruction takes 1-3 cycles, but does not block the
10712 pipeline.
10714 NREGS CYCLES
10715 1 1-3
10716 2 2-6
10717 3 3-9
10718 4 4-12
10720 Best case ldr will always win. However, the more ldr instructions
10721 we issue, the less likely we are to be able to schedule them well.
10722 Using ldr instructions also increases code size.
10724 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10725 for counts of 3 or 4 regs. */
10726 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10727 return false;
10728 return true;
10731 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10732 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10733 an array ORDER which describes the sequence to use when accessing the
10734 offsets that produces an ascending order. In this sequence, each
10735 offset must be larger by exactly 4 than the previous one. ORDER[0]
10736 must have been filled in with the lowest offset by the caller.
10737 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10738 we use to verify that ORDER produces an ascending order of registers.
10739 Return true if it was possible to construct such an order, false if
10740 not. */
10742 static bool
10743 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10744 int *unsorted_regs)
10746 int i;
10747 for (i = 1; i < nops; i++)
10749 int j;
10751 order[i] = order[i - 1];
10752 for (j = 0; j < nops; j++)
10753 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10755 /* We must find exactly one offset that is higher than the
10756 previous one by 4. */
10757 if (order[i] != order[i - 1])
10758 return false;
10759 order[i] = j;
10761 if (order[i] == order[i - 1])
10762 return false;
10763 /* The register numbers must be ascending. */
10764 if (unsorted_regs != NULL
10765 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10766 return false;
10768 return true;
10771 /* Used to determine in a peephole whether a sequence of load
10772 instructions can be changed into a load-multiple instruction.
10773 NOPS is the number of separate load instructions we are examining. The
10774 first NOPS entries in OPERANDS are the destination registers, the
10775 next NOPS entries are memory operands. If this function is
10776 successful, *BASE is set to the common base register of the memory
10777 accesses; *LOAD_OFFSET is set to the first memory location's offset
10778 from that base register.
10779 REGS is an array filled in with the destination register numbers.
10780 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10781 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10782 the sequence of registers in REGS matches the loads from ascending memory
10783 locations, and the function verifies that the register numbers are
10784 themselves ascending. If CHECK_REGS is false, the register numbers
10785 are stored in the order they are found in the operands. */
10786 static int
10787 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10788 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10790 int unsorted_regs[MAX_LDM_STM_OPS];
10791 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10792 int order[MAX_LDM_STM_OPS];
10793 rtx base_reg_rtx = NULL;
10794 int base_reg = -1;
10795 int i, ldm_case;
10797 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10798 easily extended if required. */
10799 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10801 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10803 /* Loop over the operands and check that the memory references are
10804 suitable (i.e. immediate offsets from the same base register). At
10805 the same time, extract the target register, and the memory
10806 offsets. */
10807 for (i = 0; i < nops; i++)
10809 rtx reg;
10810 rtx offset;
10812 /* Convert a subreg of a mem into the mem itself. */
10813 if (GET_CODE (operands[nops + i]) == SUBREG)
10814 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10816 gcc_assert (MEM_P (operands[nops + i]));
10818 /* Don't reorder volatile memory references; it doesn't seem worth
10819 looking for the case where the order is ok anyway. */
10820 if (MEM_VOLATILE_P (operands[nops + i]))
10821 return 0;
10823 offset = const0_rtx;
10825 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10826 || (GET_CODE (reg) == SUBREG
10827 && REG_P (reg = SUBREG_REG (reg))))
10828 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10829 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10830 || (GET_CODE (reg) == SUBREG
10831 && REG_P (reg = SUBREG_REG (reg))))
10832 && (CONST_INT_P (offset
10833 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10835 if (i == 0)
10837 base_reg = REGNO (reg);
10838 base_reg_rtx = reg;
10839 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10840 return 0;
10842 else if (base_reg != (int) REGNO (reg))
10843 /* Not addressed from the same base register. */
10844 return 0;
10846 unsorted_regs[i] = (REG_P (operands[i])
10847 ? REGNO (operands[i])
10848 : REGNO (SUBREG_REG (operands[i])));
10850 /* If it isn't an integer register, or if it overwrites the
10851 base register but isn't the last insn in the list, then
10852 we can't do this. */
10853 if (unsorted_regs[i] < 0
10854 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10855 || unsorted_regs[i] > 14
10856 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10857 return 0;
10859 /* Don't allow SP to be loaded unless it is also the base
10860 register. It guarantees that SP is reset correctly when
10861 an LDM instruction is interrupted. Otherwise, we might
10862 end up with a corrupt stack. */
10863 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10864 return 0;
10866 unsorted_offsets[i] = INTVAL (offset);
10867 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10868 order[0] = i;
10870 else
10871 /* Not a suitable memory address. */
10872 return 0;
10875 /* All the useful information has now been extracted from the
10876 operands into unsorted_regs and unsorted_offsets; additionally,
10877 order[0] has been set to the lowest offset in the list. Sort
10878 the offsets into order, verifying that they are adjacent, and
10879 check that the register numbers are ascending. */
10880 if (!compute_offset_order (nops, unsorted_offsets, order,
10881 check_regs ? unsorted_regs : NULL))
10882 return 0;
10884 if (saved_order)
10885 memcpy (saved_order, order, sizeof order);
10887 if (base)
10889 *base = base_reg;
10891 for (i = 0; i < nops; i++)
10892 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10894 *load_offset = unsorted_offsets[order[0]];
10897 if (TARGET_THUMB1
10898 && !peep2_reg_dead_p (nops, base_reg_rtx))
10899 return 0;
10901 if (unsorted_offsets[order[0]] == 0)
10902 ldm_case = 1; /* ldmia */
10903 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10904 ldm_case = 2; /* ldmib */
10905 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10906 ldm_case = 3; /* ldmda */
10907 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10908 ldm_case = 4; /* ldmdb */
10909 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10910 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10911 ldm_case = 5;
10912 else
10913 return 0;
10915 if (!multiple_operation_profitable_p (false, nops,
10916 ldm_case == 5
10917 ? unsorted_offsets[order[0]] : 0))
10918 return 0;
10920 return ldm_case;
10923 /* Used to determine in a peephole whether a sequence of store instructions can
10924 be changed into a store-multiple instruction.
10925 NOPS is the number of separate store instructions we are examining.
10926 NOPS_TOTAL is the total number of instructions recognized by the peephole
10927 pattern.
10928 The first NOPS entries in OPERANDS are the source registers, the next
10929 NOPS entries are memory operands. If this function is successful, *BASE is
10930 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10931 to the first memory location's offset from that base register. REGS is an
10932 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10933 likewise filled with the corresponding rtx's.
10934 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10935 numbers to an ascending order of stores.
10936 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10937 from ascending memory locations, and the function verifies that the register
10938 numbers are themselves ascending. If CHECK_REGS is false, the register
10939 numbers are stored in the order they are found in the operands. */
10940 static int
10941 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10942 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10943 HOST_WIDE_INT *load_offset, bool check_regs)
10945 int unsorted_regs[MAX_LDM_STM_OPS];
10946 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10947 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10948 int order[MAX_LDM_STM_OPS];
10949 int base_reg = -1;
10950 rtx base_reg_rtx = NULL;
10951 int i, stm_case;
10953 /* Write back of base register is currently only supported for Thumb 1. */
10954 int base_writeback = TARGET_THUMB1;
10956 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10957 easily extended if required. */
10958 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10960 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10962 /* Loop over the operands and check that the memory references are
10963 suitable (i.e. immediate offsets from the same base register). At
10964 the same time, extract the target register, and the memory
10965 offsets. */
10966 for (i = 0; i < nops; i++)
10968 rtx reg;
10969 rtx offset;
10971 /* Convert a subreg of a mem into the mem itself. */
10972 if (GET_CODE (operands[nops + i]) == SUBREG)
10973 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10975 gcc_assert (MEM_P (operands[nops + i]));
10977 /* Don't reorder volatile memory references; it doesn't seem worth
10978 looking for the case where the order is ok anyway. */
10979 if (MEM_VOLATILE_P (operands[nops + i]))
10980 return 0;
10982 offset = const0_rtx;
10984 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10985 || (GET_CODE (reg) == SUBREG
10986 && REG_P (reg = SUBREG_REG (reg))))
10987 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10988 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10989 || (GET_CODE (reg) == SUBREG
10990 && REG_P (reg = SUBREG_REG (reg))))
10991 && (CONST_INT_P (offset
10992 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10994 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10995 ? operands[i] : SUBREG_REG (operands[i]));
10996 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10998 if (i == 0)
11000 base_reg = REGNO (reg);
11001 base_reg_rtx = reg;
11002 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
11003 return 0;
11005 else if (base_reg != (int) REGNO (reg))
11006 /* Not addressed from the same base register. */
11007 return 0;
11009 /* If it isn't an integer register, then we can't do this. */
11010 if (unsorted_regs[i] < 0
11011 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
11012 /* The effects are unpredictable if the base register is
11013 both updated and stored. */
11014 || (base_writeback && unsorted_regs[i] == base_reg)
11015 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
11016 || unsorted_regs[i] > 14)
11017 return 0;
11019 unsorted_offsets[i] = INTVAL (offset);
11020 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
11021 order[0] = i;
11023 else
11024 /* Not a suitable memory address. */
11025 return 0;
11028 /* All the useful information has now been extracted from the
11029 operands into unsorted_regs and unsorted_offsets; additionally,
11030 order[0] has been set to the lowest offset in the list. Sort
11031 the offsets into order, verifying that they are adjacent, and
11032 check that the register numbers are ascending. */
11033 if (!compute_offset_order (nops, unsorted_offsets, order,
11034 check_regs ? unsorted_regs : NULL))
11035 return 0;
11037 if (saved_order)
11038 memcpy (saved_order, order, sizeof order);
11040 if (base)
11042 *base = base_reg;
11044 for (i = 0; i < nops; i++)
11046 regs[i] = unsorted_regs[check_regs ? order[i] : i];
11047 if (reg_rtxs)
11048 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
11051 *load_offset = unsorted_offsets[order[0]];
11054 if (TARGET_THUMB1
11055 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
11056 return 0;
11058 if (unsorted_offsets[order[0]] == 0)
11059 stm_case = 1; /* stmia */
11060 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
11061 stm_case = 2; /* stmib */
11062 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
11063 stm_case = 3; /* stmda */
11064 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
11065 stm_case = 4; /* stmdb */
11066 else
11067 return 0;
11069 if (!multiple_operation_profitable_p (false, nops, 0))
11070 return 0;
11072 return stm_case;
11075 /* Routines for use in generating RTL. */
11077 /* Generate a load-multiple instruction. COUNT is the number of loads in
11078 the instruction; REGS and MEMS are arrays containing the operands.
11079 BASEREG is the base register to be used in addressing the memory operands.
11080 WBACK_OFFSET is nonzero if the instruction should update the base
11081 register. */
11083 static rtx
11084 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11085 HOST_WIDE_INT wback_offset)
11087 int i = 0, j;
11088 rtx result;
11090 if (!multiple_operation_profitable_p (false, count, 0))
11092 rtx seq;
11094 start_sequence ();
11096 for (i = 0; i < count; i++)
11097 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
11099 if (wback_offset != 0)
11100 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11102 seq = get_insns ();
11103 end_sequence ();
11105 return seq;
11108 result = gen_rtx_PARALLEL (VOIDmode,
11109 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11110 if (wback_offset != 0)
11112 XVECEXP (result, 0, 0)
11113 = gen_rtx_SET (VOIDmode, basereg,
11114 plus_constant (Pmode, basereg, wback_offset));
11115 i = 1;
11116 count++;
11119 for (j = 0; i < count; i++, j++)
11120 XVECEXP (result, 0, i)
11121 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
11123 return result;
11126 /* Generate a store-multiple instruction. COUNT is the number of stores in
11127 the instruction; REGS and MEMS are arrays containing the operands.
11128 BASEREG is the base register to be used in addressing the memory operands.
11129 WBACK_OFFSET is nonzero if the instruction should update the base
11130 register. */
11132 static rtx
11133 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11134 HOST_WIDE_INT wback_offset)
11136 int i = 0, j;
11137 rtx result;
11139 if (GET_CODE (basereg) == PLUS)
11140 basereg = XEXP (basereg, 0);
11142 if (!multiple_operation_profitable_p (false, count, 0))
11144 rtx seq;
11146 start_sequence ();
11148 for (i = 0; i < count; i++)
11149 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11151 if (wback_offset != 0)
11152 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11154 seq = get_insns ();
11155 end_sequence ();
11157 return seq;
11160 result = gen_rtx_PARALLEL (VOIDmode,
11161 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11162 if (wback_offset != 0)
11164 XVECEXP (result, 0, 0)
11165 = gen_rtx_SET (VOIDmode, basereg,
11166 plus_constant (Pmode, basereg, wback_offset));
11167 i = 1;
11168 count++;
11171 for (j = 0; i < count; i++, j++)
11172 XVECEXP (result, 0, i)
11173 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11175 return result;
11178 /* Generate either a load-multiple or a store-multiple instruction. This
11179 function can be used in situations where we can start with a single MEM
11180 rtx and adjust its address upwards.
11181 COUNT is the number of operations in the instruction, not counting a
11182 possible update of the base register. REGS is an array containing the
11183 register operands.
11184 BASEREG is the base register to be used in addressing the memory operands,
11185 which are constructed from BASEMEM.
11186 WRITE_BACK specifies whether the generated instruction should include an
11187 update of the base register.
11188 OFFSETP is used to pass an offset to and from this function; this offset
11189 is not used when constructing the address (instead BASEMEM should have an
11190 appropriate offset in its address), it is used only for setting
11191 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11193 static rtx
11194 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11195 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11197 rtx mems[MAX_LDM_STM_OPS];
11198 HOST_WIDE_INT offset = *offsetp;
11199 int i;
11201 gcc_assert (count <= MAX_LDM_STM_OPS);
11203 if (GET_CODE (basereg) == PLUS)
11204 basereg = XEXP (basereg, 0);
11206 for (i = 0; i < count; i++)
11208 rtx addr = plus_constant (Pmode, basereg, i * 4);
11209 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11210 offset += 4;
11213 if (write_back)
11214 *offsetp = offset;
11216 if (is_load)
11217 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11218 write_back ? 4 * count : 0);
11219 else
11220 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11221 write_back ? 4 * count : 0);
11225 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11226 rtx basemem, HOST_WIDE_INT *offsetp)
11228 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11229 offsetp);
11233 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11234 rtx basemem, HOST_WIDE_INT *offsetp)
11236 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11237 offsetp);
11240 /* Called from a peephole2 expander to turn a sequence of loads into an
11241 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11242 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11243 is true if we can reorder the registers because they are used commutatively
11244 subsequently.
11245 Returns true iff we could generate a new instruction. */
11247 bool
11248 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11250 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11251 rtx mems[MAX_LDM_STM_OPS];
11252 int i, j, base_reg;
11253 rtx base_reg_rtx;
11254 HOST_WIDE_INT offset;
11255 int write_back = FALSE;
11256 int ldm_case;
11257 rtx addr;
11259 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11260 &base_reg, &offset, !sort_regs);
11262 if (ldm_case == 0)
11263 return false;
11265 if (sort_regs)
11266 for (i = 0; i < nops - 1; i++)
11267 for (j = i + 1; j < nops; j++)
11268 if (regs[i] > regs[j])
11270 int t = regs[i];
11271 regs[i] = regs[j];
11272 regs[j] = t;
11274 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11276 if (TARGET_THUMB1)
11278 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11279 gcc_assert (ldm_case == 1 || ldm_case == 5);
11280 write_back = TRUE;
11283 if (ldm_case == 5)
11285 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11286 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11287 offset = 0;
11288 if (!TARGET_THUMB1)
11290 base_reg = regs[0];
11291 base_reg_rtx = newbase;
11295 for (i = 0; i < nops; i++)
11297 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11298 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11299 SImode, addr, 0);
11301 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11302 write_back ? offset + i * 4 : 0));
11303 return true;
11306 /* Called from a peephole2 expander to turn a sequence of stores into an
11307 STM instruction. OPERANDS are the operands found by the peephole matcher;
11308 NOPS indicates how many separate stores we are trying to combine.
11309 Returns true iff we could generate a new instruction. */
11311 bool
11312 gen_stm_seq (rtx *operands, int nops)
11314 int i;
11315 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11316 rtx mems[MAX_LDM_STM_OPS];
11317 int base_reg;
11318 rtx base_reg_rtx;
11319 HOST_WIDE_INT offset;
11320 int write_back = FALSE;
11321 int stm_case;
11322 rtx addr;
11323 bool base_reg_dies;
11325 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11326 mem_order, &base_reg, &offset, true);
11328 if (stm_case == 0)
11329 return false;
11331 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11333 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11334 if (TARGET_THUMB1)
11336 gcc_assert (base_reg_dies);
11337 write_back = TRUE;
11340 if (stm_case == 5)
11342 gcc_assert (base_reg_dies);
11343 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11344 offset = 0;
11347 addr = plus_constant (Pmode, base_reg_rtx, offset);
11349 for (i = 0; i < nops; i++)
11351 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11352 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11353 SImode, addr, 0);
11355 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11356 write_back ? offset + i * 4 : 0));
11357 return true;
11360 /* Called from a peephole2 expander to turn a sequence of stores that are
11361 preceded by constant loads into an STM instruction. OPERANDS are the
11362 operands found by the peephole matcher; NOPS indicates how many
11363 separate stores we are trying to combine; there are 2 * NOPS
11364 instructions in the peephole.
11365 Returns true iff we could generate a new instruction. */
11367 bool
11368 gen_const_stm_seq (rtx *operands, int nops)
11370 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11371 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11372 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11373 rtx mems[MAX_LDM_STM_OPS];
11374 int base_reg;
11375 rtx base_reg_rtx;
11376 HOST_WIDE_INT offset;
11377 int write_back = FALSE;
11378 int stm_case;
11379 rtx addr;
11380 bool base_reg_dies;
11381 int i, j;
11382 HARD_REG_SET allocated;
11384 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11385 mem_order, &base_reg, &offset, false);
11387 if (stm_case == 0)
11388 return false;
11390 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11392 /* If the same register is used more than once, try to find a free
11393 register. */
11394 CLEAR_HARD_REG_SET (allocated);
11395 for (i = 0; i < nops; i++)
11397 for (j = i + 1; j < nops; j++)
11398 if (regs[i] == regs[j])
11400 rtx t = peep2_find_free_register (0, nops * 2,
11401 TARGET_THUMB1 ? "l" : "r",
11402 SImode, &allocated);
11403 if (t == NULL_RTX)
11404 return false;
11405 reg_rtxs[i] = t;
11406 regs[i] = REGNO (t);
11410 /* Compute an ordering that maps the register numbers to an ascending
11411 sequence. */
11412 reg_order[0] = 0;
11413 for (i = 0; i < nops; i++)
11414 if (regs[i] < regs[reg_order[0]])
11415 reg_order[0] = i;
11417 for (i = 1; i < nops; i++)
11419 int this_order = reg_order[i - 1];
11420 for (j = 0; j < nops; j++)
11421 if (regs[j] > regs[reg_order[i - 1]]
11422 && (this_order == reg_order[i - 1]
11423 || regs[j] < regs[this_order]))
11424 this_order = j;
11425 reg_order[i] = this_order;
11428 /* Ensure that registers that must be live after the instruction end
11429 up with the correct value. */
11430 for (i = 0; i < nops; i++)
11432 int this_order = reg_order[i];
11433 if ((this_order != mem_order[i]
11434 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11435 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11436 return false;
11439 /* Load the constants. */
11440 for (i = 0; i < nops; i++)
11442 rtx op = operands[2 * nops + mem_order[i]];
11443 sorted_regs[i] = regs[reg_order[i]];
11444 emit_move_insn (reg_rtxs[reg_order[i]], op);
11447 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11449 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11450 if (TARGET_THUMB1)
11452 gcc_assert (base_reg_dies);
11453 write_back = TRUE;
11456 if (stm_case == 5)
11458 gcc_assert (base_reg_dies);
11459 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11460 offset = 0;
11463 addr = plus_constant (Pmode, base_reg_rtx, offset);
11465 for (i = 0; i < nops; i++)
11467 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11468 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11469 SImode, addr, 0);
11471 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11472 write_back ? offset + i * 4 : 0));
11473 return true;
11476 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11477 unaligned copies on processors which support unaligned semantics for those
11478 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11479 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11480 An interleave factor of 1 (the minimum) will perform no interleaving.
11481 Load/store multiple are used for aligned addresses where possible. */
11483 static void
11484 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11485 HOST_WIDE_INT length,
11486 unsigned int interleave_factor)
11488 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11489 int *regnos = XALLOCAVEC (int, interleave_factor);
11490 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11491 HOST_WIDE_INT i, j;
11492 HOST_WIDE_INT remaining = length, words;
11493 rtx halfword_tmp = NULL, byte_tmp = NULL;
11494 rtx dst, src;
11495 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11496 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11497 HOST_WIDE_INT srcoffset, dstoffset;
11498 HOST_WIDE_INT src_autoinc, dst_autoinc;
11499 rtx mem, addr;
11501 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11503 /* Use hard registers if we have aligned source or destination so we can use
11504 load/store multiple with contiguous registers. */
11505 if (dst_aligned || src_aligned)
11506 for (i = 0; i < interleave_factor; i++)
11507 regs[i] = gen_rtx_REG (SImode, i);
11508 else
11509 for (i = 0; i < interleave_factor; i++)
11510 regs[i] = gen_reg_rtx (SImode);
11512 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11513 src = copy_addr_to_reg (XEXP (srcbase, 0));
11515 srcoffset = dstoffset = 0;
11517 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11518 For copying the last bytes we want to subtract this offset again. */
11519 src_autoinc = dst_autoinc = 0;
11521 for (i = 0; i < interleave_factor; i++)
11522 regnos[i] = i;
11524 /* Copy BLOCK_SIZE_BYTES chunks. */
11526 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11528 /* Load words. */
11529 if (src_aligned && interleave_factor > 1)
11531 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11532 TRUE, srcbase, &srcoffset));
11533 src_autoinc += UNITS_PER_WORD * interleave_factor;
11535 else
11537 for (j = 0; j < interleave_factor; j++)
11539 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11540 - src_autoinc));
11541 mem = adjust_automodify_address (srcbase, SImode, addr,
11542 srcoffset + j * UNITS_PER_WORD);
11543 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11545 srcoffset += block_size_bytes;
11548 /* Store words. */
11549 if (dst_aligned && interleave_factor > 1)
11551 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11552 TRUE, dstbase, &dstoffset));
11553 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11555 else
11557 for (j = 0; j < interleave_factor; j++)
11559 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11560 - dst_autoinc));
11561 mem = adjust_automodify_address (dstbase, SImode, addr,
11562 dstoffset + j * UNITS_PER_WORD);
11563 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11565 dstoffset += block_size_bytes;
11568 remaining -= block_size_bytes;
11571 /* Copy any whole words left (note these aren't interleaved with any
11572 subsequent halfword/byte load/stores in the interests of simplicity). */
11574 words = remaining / UNITS_PER_WORD;
11576 gcc_assert (words < interleave_factor);
11578 if (src_aligned && words > 1)
11580 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11581 &srcoffset));
11582 src_autoinc += UNITS_PER_WORD * words;
11584 else
11586 for (j = 0; j < words; j++)
11588 addr = plus_constant (Pmode, src,
11589 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11590 mem = adjust_automodify_address (srcbase, SImode, addr,
11591 srcoffset + j * UNITS_PER_WORD);
11592 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11594 srcoffset += words * UNITS_PER_WORD;
11597 if (dst_aligned && words > 1)
11599 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11600 &dstoffset));
11601 dst_autoinc += words * UNITS_PER_WORD;
11603 else
11605 for (j = 0; j < words; j++)
11607 addr = plus_constant (Pmode, dst,
11608 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11609 mem = adjust_automodify_address (dstbase, SImode, addr,
11610 dstoffset + j * UNITS_PER_WORD);
11611 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11613 dstoffset += words * UNITS_PER_WORD;
11616 remaining -= words * UNITS_PER_WORD;
11618 gcc_assert (remaining < 4);
11620 /* Copy a halfword if necessary. */
11622 if (remaining >= 2)
11624 halfword_tmp = gen_reg_rtx (SImode);
11626 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11627 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11628 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11630 /* Either write out immediately, or delay until we've loaded the last
11631 byte, depending on interleave factor. */
11632 if (interleave_factor == 1)
11634 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11635 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11636 emit_insn (gen_unaligned_storehi (mem,
11637 gen_lowpart (HImode, halfword_tmp)));
11638 halfword_tmp = NULL;
11639 dstoffset += 2;
11642 remaining -= 2;
11643 srcoffset += 2;
11646 gcc_assert (remaining < 2);
11648 /* Copy last byte. */
11650 if ((remaining & 1) != 0)
11652 byte_tmp = gen_reg_rtx (SImode);
11654 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11655 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11656 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11658 if (interleave_factor == 1)
11660 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11661 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11662 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11663 byte_tmp = NULL;
11664 dstoffset++;
11667 remaining--;
11668 srcoffset++;
11671 /* Store last halfword if we haven't done so already. */
11673 if (halfword_tmp)
11675 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11676 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11677 emit_insn (gen_unaligned_storehi (mem,
11678 gen_lowpart (HImode, halfword_tmp)));
11679 dstoffset += 2;
11682 /* Likewise for last byte. */
11684 if (byte_tmp)
11686 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11687 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11688 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11689 dstoffset++;
11692 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11695 /* From mips_adjust_block_mem:
11697 Helper function for doing a loop-based block operation on memory
11698 reference MEM. Each iteration of the loop will operate on LENGTH
11699 bytes of MEM.
11701 Create a new base register for use within the loop and point it to
11702 the start of MEM. Create a new memory reference that uses this
11703 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11705 static void
11706 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11707 rtx *loop_mem)
11709 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11711 /* Although the new mem does not refer to a known location,
11712 it does keep up to LENGTH bytes of alignment. */
11713 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11714 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11717 /* From mips_block_move_loop:
11719 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11720 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11721 the memory regions do not overlap. */
11723 static void
11724 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11725 unsigned int interleave_factor,
11726 HOST_WIDE_INT bytes_per_iter)
11728 rtx label, src_reg, dest_reg, final_src, test;
11729 HOST_WIDE_INT leftover;
11731 leftover = length % bytes_per_iter;
11732 length -= leftover;
11734 /* Create registers and memory references for use within the loop. */
11735 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11736 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11738 /* Calculate the value that SRC_REG should have after the last iteration of
11739 the loop. */
11740 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11741 0, 0, OPTAB_WIDEN);
11743 /* Emit the start of the loop. */
11744 label = gen_label_rtx ();
11745 emit_label (label);
11747 /* Emit the loop body. */
11748 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11749 interleave_factor);
11751 /* Move on to the next block. */
11752 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11753 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11755 /* Emit the loop condition. */
11756 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11757 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11759 /* Mop up any left-over bytes. */
11760 if (leftover)
11761 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11764 /* Emit a block move when either the source or destination is unaligned (not
11765 aligned to a four-byte boundary). This may need further tuning depending on
11766 core type, optimize_size setting, etc. */
11768 static int
11769 arm_movmemqi_unaligned (rtx *operands)
11771 HOST_WIDE_INT length = INTVAL (operands[2]);
11773 if (optimize_size)
11775 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11776 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11777 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11778 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11779 or dst_aligned though: allow more interleaving in those cases since the
11780 resulting code can be smaller. */
11781 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11782 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11784 if (length > 12)
11785 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11786 interleave_factor, bytes_per_iter);
11787 else
11788 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11789 interleave_factor);
11791 else
11793 /* Note that the loop created by arm_block_move_unaligned_loop may be
11794 subject to loop unrolling, which makes tuning this condition a little
11795 redundant. */
11796 if (length > 32)
11797 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11798 else
11799 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11802 return 1;
11806 arm_gen_movmemqi (rtx *operands)
11808 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11809 HOST_WIDE_INT srcoffset, dstoffset;
11810 int i;
11811 rtx src, dst, srcbase, dstbase;
11812 rtx part_bytes_reg = NULL;
11813 rtx mem;
11815 if (!CONST_INT_P (operands[2])
11816 || !CONST_INT_P (operands[3])
11817 || INTVAL (operands[2]) > 64)
11818 return 0;
11820 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11821 return arm_movmemqi_unaligned (operands);
11823 if (INTVAL (operands[3]) & 3)
11824 return 0;
11826 dstbase = operands[0];
11827 srcbase = operands[1];
11829 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11830 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11832 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11833 out_words_to_go = INTVAL (operands[2]) / 4;
11834 last_bytes = INTVAL (operands[2]) & 3;
11835 dstoffset = srcoffset = 0;
11837 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11838 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11840 for (i = 0; in_words_to_go >= 2; i+=4)
11842 if (in_words_to_go > 4)
11843 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11844 TRUE, srcbase, &srcoffset));
11845 else
11846 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11847 src, FALSE, srcbase,
11848 &srcoffset));
11850 if (out_words_to_go)
11852 if (out_words_to_go > 4)
11853 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11854 TRUE, dstbase, &dstoffset));
11855 else if (out_words_to_go != 1)
11856 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11857 out_words_to_go, dst,
11858 (last_bytes == 0
11859 ? FALSE : TRUE),
11860 dstbase, &dstoffset));
11861 else
11863 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11864 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11865 if (last_bytes != 0)
11867 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11868 dstoffset += 4;
11873 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11874 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11877 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11878 if (out_words_to_go)
11880 rtx sreg;
11882 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11883 sreg = copy_to_reg (mem);
11885 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11886 emit_move_insn (mem, sreg);
11887 in_words_to_go--;
11889 gcc_assert (!in_words_to_go); /* Sanity check */
11892 if (in_words_to_go)
11894 gcc_assert (in_words_to_go > 0);
11896 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11897 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11900 gcc_assert (!last_bytes || part_bytes_reg);
11902 if (BYTES_BIG_ENDIAN && last_bytes)
11904 rtx tmp = gen_reg_rtx (SImode);
11906 /* The bytes we want are in the top end of the word. */
11907 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11908 GEN_INT (8 * (4 - last_bytes))));
11909 part_bytes_reg = tmp;
11911 while (last_bytes)
11913 mem = adjust_automodify_address (dstbase, QImode,
11914 plus_constant (Pmode, dst,
11915 last_bytes - 1),
11916 dstoffset + last_bytes - 1);
11917 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11919 if (--last_bytes)
11921 tmp = gen_reg_rtx (SImode);
11922 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11923 part_bytes_reg = tmp;
11928 else
11930 if (last_bytes > 1)
11932 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11933 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11934 last_bytes -= 2;
11935 if (last_bytes)
11937 rtx tmp = gen_reg_rtx (SImode);
11938 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11939 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11940 part_bytes_reg = tmp;
11941 dstoffset += 2;
11945 if (last_bytes)
11947 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11948 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11952 return 1;
11955 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
11956 by mode size. */
11957 inline static rtx
11958 next_consecutive_mem (rtx mem)
11960 enum machine_mode mode = GET_MODE (mem);
11961 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
11962 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
11964 return adjust_automodify_address (mem, mode, addr, offset);
11967 /* Copy using LDRD/STRD instructions whenever possible.
11968 Returns true upon success. */
11969 bool
11970 gen_movmem_ldrd_strd (rtx *operands)
11972 unsigned HOST_WIDE_INT len;
11973 HOST_WIDE_INT align;
11974 rtx src, dst, base;
11975 rtx reg0;
11976 bool src_aligned, dst_aligned;
11977 bool src_volatile, dst_volatile;
11979 gcc_assert (CONST_INT_P (operands[2]));
11980 gcc_assert (CONST_INT_P (operands[3]));
11982 len = UINTVAL (operands[2]);
11983 if (len > 64)
11984 return false;
11986 /* Maximum alignment we can assume for both src and dst buffers. */
11987 align = INTVAL (operands[3]);
11989 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
11990 return false;
11992 /* Place src and dst addresses in registers
11993 and update the corresponding mem rtx. */
11994 dst = operands[0];
11995 dst_volatile = MEM_VOLATILE_P (dst);
11996 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
11997 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
11998 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
12000 src = operands[1];
12001 src_volatile = MEM_VOLATILE_P (src);
12002 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
12003 base = copy_to_mode_reg (SImode, XEXP (src, 0));
12004 src = adjust_automodify_address (src, VOIDmode, base, 0);
12006 if (!unaligned_access && !(src_aligned && dst_aligned))
12007 return false;
12009 if (src_volatile || dst_volatile)
12010 return false;
12012 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
12013 if (!(dst_aligned || src_aligned))
12014 return arm_gen_movmemqi (operands);
12016 src = adjust_address (src, DImode, 0);
12017 dst = adjust_address (dst, DImode, 0);
12018 while (len >= 8)
12020 len -= 8;
12021 reg0 = gen_reg_rtx (DImode);
12022 if (src_aligned)
12023 emit_move_insn (reg0, src);
12024 else
12025 emit_insn (gen_unaligned_loaddi (reg0, src));
12027 if (dst_aligned)
12028 emit_move_insn (dst, reg0);
12029 else
12030 emit_insn (gen_unaligned_storedi (dst, reg0));
12032 src = next_consecutive_mem (src);
12033 dst = next_consecutive_mem (dst);
12036 gcc_assert (len < 8);
12037 if (len >= 4)
12039 /* More than a word but less than a double-word to copy. Copy a word. */
12040 reg0 = gen_reg_rtx (SImode);
12041 src = adjust_address (src, SImode, 0);
12042 dst = adjust_address (dst, SImode, 0);
12043 if (src_aligned)
12044 emit_move_insn (reg0, src);
12045 else
12046 emit_insn (gen_unaligned_loadsi (reg0, src));
12048 if (dst_aligned)
12049 emit_move_insn (dst, reg0);
12050 else
12051 emit_insn (gen_unaligned_storesi (dst, reg0));
12053 src = next_consecutive_mem (src);
12054 dst = next_consecutive_mem (dst);
12055 len -= 4;
12058 if (len == 0)
12059 return true;
12061 /* Copy the remaining bytes. */
12062 if (len >= 2)
12064 dst = adjust_address (dst, HImode, 0);
12065 src = adjust_address (src, HImode, 0);
12066 reg0 = gen_reg_rtx (SImode);
12067 if (src_aligned)
12068 emit_insn (gen_zero_extendhisi2 (reg0, src));
12069 else
12070 emit_insn (gen_unaligned_loadhiu (reg0, src));
12072 if (dst_aligned)
12073 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
12074 else
12075 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
12077 src = next_consecutive_mem (src);
12078 dst = next_consecutive_mem (dst);
12079 if (len == 2)
12080 return true;
12083 dst = adjust_address (dst, QImode, 0);
12084 src = adjust_address (src, QImode, 0);
12085 reg0 = gen_reg_rtx (QImode);
12086 emit_move_insn (reg0, src);
12087 emit_move_insn (dst, reg0);
12088 return true;
12091 /* Select a dominance comparison mode if possible for a test of the general
12092 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
12093 COND_OR == DOM_CC_X_AND_Y => (X && Y)
12094 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
12095 COND_OR == DOM_CC_X_OR_Y => (X || Y)
12096 In all cases OP will be either EQ or NE, but we don't need to know which
12097 here. If we are unable to support a dominance comparison we return
12098 CC mode. This will then fail to match for the RTL expressions that
12099 generate this call. */
12100 enum machine_mode
12101 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
12103 enum rtx_code cond1, cond2;
12104 int swapped = 0;
12106 /* Currently we will probably get the wrong result if the individual
12107 comparisons are not simple. This also ensures that it is safe to
12108 reverse a comparison if necessary. */
12109 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
12110 != CCmode)
12111 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
12112 != CCmode))
12113 return CCmode;
12115 /* The if_then_else variant of this tests the second condition if the
12116 first passes, but is true if the first fails. Reverse the first
12117 condition to get a true "inclusive-or" expression. */
12118 if (cond_or == DOM_CC_NX_OR_Y)
12119 cond1 = reverse_condition (cond1);
12121 /* If the comparisons are not equal, and one doesn't dominate the other,
12122 then we can't do this. */
12123 if (cond1 != cond2
12124 && !comparison_dominates_p (cond1, cond2)
12125 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
12126 return CCmode;
12128 if (swapped)
12130 enum rtx_code temp = cond1;
12131 cond1 = cond2;
12132 cond2 = temp;
12135 switch (cond1)
12137 case EQ:
12138 if (cond_or == DOM_CC_X_AND_Y)
12139 return CC_DEQmode;
12141 switch (cond2)
12143 case EQ: return CC_DEQmode;
12144 case LE: return CC_DLEmode;
12145 case LEU: return CC_DLEUmode;
12146 case GE: return CC_DGEmode;
12147 case GEU: return CC_DGEUmode;
12148 default: gcc_unreachable ();
12151 case LT:
12152 if (cond_or == DOM_CC_X_AND_Y)
12153 return CC_DLTmode;
12155 switch (cond2)
12157 case LT:
12158 return CC_DLTmode;
12159 case LE:
12160 return CC_DLEmode;
12161 case NE:
12162 return CC_DNEmode;
12163 default:
12164 gcc_unreachable ();
12167 case GT:
12168 if (cond_or == DOM_CC_X_AND_Y)
12169 return CC_DGTmode;
12171 switch (cond2)
12173 case GT:
12174 return CC_DGTmode;
12175 case GE:
12176 return CC_DGEmode;
12177 case NE:
12178 return CC_DNEmode;
12179 default:
12180 gcc_unreachable ();
12183 case LTU:
12184 if (cond_or == DOM_CC_X_AND_Y)
12185 return CC_DLTUmode;
12187 switch (cond2)
12189 case LTU:
12190 return CC_DLTUmode;
12191 case LEU:
12192 return CC_DLEUmode;
12193 case NE:
12194 return CC_DNEmode;
12195 default:
12196 gcc_unreachable ();
12199 case GTU:
12200 if (cond_or == DOM_CC_X_AND_Y)
12201 return CC_DGTUmode;
12203 switch (cond2)
12205 case GTU:
12206 return CC_DGTUmode;
12207 case GEU:
12208 return CC_DGEUmode;
12209 case NE:
12210 return CC_DNEmode;
12211 default:
12212 gcc_unreachable ();
12215 /* The remaining cases only occur when both comparisons are the
12216 same. */
12217 case NE:
12218 gcc_assert (cond1 == cond2);
12219 return CC_DNEmode;
12221 case LE:
12222 gcc_assert (cond1 == cond2);
12223 return CC_DLEmode;
12225 case GE:
12226 gcc_assert (cond1 == cond2);
12227 return CC_DGEmode;
12229 case LEU:
12230 gcc_assert (cond1 == cond2);
12231 return CC_DLEUmode;
12233 case GEU:
12234 gcc_assert (cond1 == cond2);
12235 return CC_DGEUmode;
12237 default:
12238 gcc_unreachable ();
12242 enum machine_mode
12243 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
12245 /* All floating point compares return CCFP if it is an equality
12246 comparison, and CCFPE otherwise. */
12247 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12249 switch (op)
12251 case EQ:
12252 case NE:
12253 case UNORDERED:
12254 case ORDERED:
12255 case UNLT:
12256 case UNLE:
12257 case UNGT:
12258 case UNGE:
12259 case UNEQ:
12260 case LTGT:
12261 return CCFPmode;
12263 case LT:
12264 case LE:
12265 case GT:
12266 case GE:
12267 return CCFPEmode;
12269 default:
12270 gcc_unreachable ();
12274 /* A compare with a shifted operand. Because of canonicalization, the
12275 comparison will have to be swapped when we emit the assembler. */
12276 if (GET_MODE (y) == SImode
12277 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12278 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12279 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12280 || GET_CODE (x) == ROTATERT))
12281 return CC_SWPmode;
12283 /* This operation is performed swapped, but since we only rely on the Z
12284 flag we don't need an additional mode. */
12285 if (GET_MODE (y) == SImode
12286 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12287 && GET_CODE (x) == NEG
12288 && (op == EQ || op == NE))
12289 return CC_Zmode;
12291 /* This is a special case that is used by combine to allow a
12292 comparison of a shifted byte load to be split into a zero-extend
12293 followed by a comparison of the shifted integer (only valid for
12294 equalities and unsigned inequalities). */
12295 if (GET_MODE (x) == SImode
12296 && GET_CODE (x) == ASHIFT
12297 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12298 && GET_CODE (XEXP (x, 0)) == SUBREG
12299 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12300 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12301 && (op == EQ || op == NE
12302 || op == GEU || op == GTU || op == LTU || op == LEU)
12303 && CONST_INT_P (y))
12304 return CC_Zmode;
12306 /* A construct for a conditional compare, if the false arm contains
12307 0, then both conditions must be true, otherwise either condition
12308 must be true. Not all conditions are possible, so CCmode is
12309 returned if it can't be done. */
12310 if (GET_CODE (x) == IF_THEN_ELSE
12311 && (XEXP (x, 2) == const0_rtx
12312 || XEXP (x, 2) == const1_rtx)
12313 && COMPARISON_P (XEXP (x, 0))
12314 && COMPARISON_P (XEXP (x, 1)))
12315 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12316 INTVAL (XEXP (x, 2)));
12318 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12319 if (GET_CODE (x) == AND
12320 && (op == EQ || op == NE)
12321 && COMPARISON_P (XEXP (x, 0))
12322 && COMPARISON_P (XEXP (x, 1)))
12323 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12324 DOM_CC_X_AND_Y);
12326 if (GET_CODE (x) == IOR
12327 && (op == EQ || op == NE)
12328 && COMPARISON_P (XEXP (x, 0))
12329 && COMPARISON_P (XEXP (x, 1)))
12330 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12331 DOM_CC_X_OR_Y);
12333 /* An operation (on Thumb) where we want to test for a single bit.
12334 This is done by shifting that bit up into the top bit of a
12335 scratch register; we can then branch on the sign bit. */
12336 if (TARGET_THUMB1
12337 && GET_MODE (x) == SImode
12338 && (op == EQ || op == NE)
12339 && GET_CODE (x) == ZERO_EXTRACT
12340 && XEXP (x, 1) == const1_rtx)
12341 return CC_Nmode;
12343 /* An operation that sets the condition codes as a side-effect, the
12344 V flag is not set correctly, so we can only use comparisons where
12345 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12346 instead.) */
12347 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12348 if (GET_MODE (x) == SImode
12349 && y == const0_rtx
12350 && (op == EQ || op == NE || op == LT || op == GE)
12351 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12352 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12353 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12354 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12355 || GET_CODE (x) == LSHIFTRT
12356 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12357 || GET_CODE (x) == ROTATERT
12358 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12359 return CC_NOOVmode;
12361 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12362 return CC_Zmode;
12364 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12365 && GET_CODE (x) == PLUS
12366 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12367 return CC_Cmode;
12369 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12371 switch (op)
12373 case EQ:
12374 case NE:
12375 /* A DImode comparison against zero can be implemented by
12376 or'ing the two halves together. */
12377 if (y == const0_rtx)
12378 return CC_Zmode;
12380 /* We can do an equality test in three Thumb instructions. */
12381 if (!TARGET_32BIT)
12382 return CC_Zmode;
12384 /* FALLTHROUGH */
12386 case LTU:
12387 case LEU:
12388 case GTU:
12389 case GEU:
12390 /* DImode unsigned comparisons can be implemented by cmp +
12391 cmpeq without a scratch register. Not worth doing in
12392 Thumb-2. */
12393 if (TARGET_32BIT)
12394 return CC_CZmode;
12396 /* FALLTHROUGH */
12398 case LT:
12399 case LE:
12400 case GT:
12401 case GE:
12402 /* DImode signed and unsigned comparisons can be implemented
12403 by cmp + sbcs with a scratch register, but that does not
12404 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12405 gcc_assert (op != EQ && op != NE);
12406 return CC_NCVmode;
12408 default:
12409 gcc_unreachable ();
12413 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12414 return GET_MODE (x);
12416 return CCmode;
12419 /* X and Y are two things to compare using CODE. Emit the compare insn and
12420 return the rtx for register 0 in the proper mode. FP means this is a
12421 floating point compare: I don't think that it is needed on the arm. */
12423 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12425 enum machine_mode mode;
12426 rtx cc_reg;
12427 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12429 /* We might have X as a constant, Y as a register because of the predicates
12430 used for cmpdi. If so, force X to a register here. */
12431 if (dimode_comparison && !REG_P (x))
12432 x = force_reg (DImode, x);
12434 mode = SELECT_CC_MODE (code, x, y);
12435 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12437 if (dimode_comparison
12438 && mode != CC_CZmode)
12440 rtx clobber, set;
12442 /* To compare two non-zero values for equality, XOR them and
12443 then compare against zero. Not used for ARM mode; there
12444 CC_CZmode is cheaper. */
12445 if (mode == CC_Zmode && y != const0_rtx)
12447 gcc_assert (!reload_completed);
12448 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12449 y = const0_rtx;
12452 /* A scratch register is required. */
12453 if (reload_completed)
12454 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12455 else
12456 scratch = gen_rtx_SCRATCH (SImode);
12458 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12459 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12460 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12462 else
12463 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12465 return cc_reg;
12468 /* Generate a sequence of insns that will generate the correct return
12469 address mask depending on the physical architecture that the program
12470 is running on. */
12472 arm_gen_return_addr_mask (void)
12474 rtx reg = gen_reg_rtx (Pmode);
12476 emit_insn (gen_return_addr_mask (reg));
12477 return reg;
12480 void
12481 arm_reload_in_hi (rtx *operands)
12483 rtx ref = operands[1];
12484 rtx base, scratch;
12485 HOST_WIDE_INT offset = 0;
12487 if (GET_CODE (ref) == SUBREG)
12489 offset = SUBREG_BYTE (ref);
12490 ref = SUBREG_REG (ref);
12493 if (REG_P (ref))
12495 /* We have a pseudo which has been spilt onto the stack; there
12496 are two cases here: the first where there is a simple
12497 stack-slot replacement and a second where the stack-slot is
12498 out of range, or is used as a subreg. */
12499 if (reg_equiv_mem (REGNO (ref)))
12501 ref = reg_equiv_mem (REGNO (ref));
12502 base = find_replacement (&XEXP (ref, 0));
12504 else
12505 /* The slot is out of range, or was dressed up in a SUBREG. */
12506 base = reg_equiv_address (REGNO (ref));
12508 else
12509 base = find_replacement (&XEXP (ref, 0));
12511 /* Handle the case where the address is too complex to be offset by 1. */
12512 if (GET_CODE (base) == MINUS
12513 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12515 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12517 emit_set_insn (base_plus, base);
12518 base = base_plus;
12520 else if (GET_CODE (base) == PLUS)
12522 /* The addend must be CONST_INT, or we would have dealt with it above. */
12523 HOST_WIDE_INT hi, lo;
12525 offset += INTVAL (XEXP (base, 1));
12526 base = XEXP (base, 0);
12528 /* Rework the address into a legal sequence of insns. */
12529 /* Valid range for lo is -4095 -> 4095 */
12530 lo = (offset >= 0
12531 ? (offset & 0xfff)
12532 : -((-offset) & 0xfff));
12534 /* Corner case, if lo is the max offset then we would be out of range
12535 once we have added the additional 1 below, so bump the msb into the
12536 pre-loading insn(s). */
12537 if (lo == 4095)
12538 lo &= 0x7ff;
12540 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12541 ^ (HOST_WIDE_INT) 0x80000000)
12542 - (HOST_WIDE_INT) 0x80000000);
12544 gcc_assert (hi + lo == offset);
12546 if (hi != 0)
12548 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12550 /* Get the base address; addsi3 knows how to handle constants
12551 that require more than one insn. */
12552 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12553 base = base_plus;
12554 offset = lo;
12558 /* Operands[2] may overlap operands[0] (though it won't overlap
12559 operands[1]), that's why we asked for a DImode reg -- so we can
12560 use the bit that does not overlap. */
12561 if (REGNO (operands[2]) == REGNO (operands[0]))
12562 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12563 else
12564 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12566 emit_insn (gen_zero_extendqisi2 (scratch,
12567 gen_rtx_MEM (QImode,
12568 plus_constant (Pmode, base,
12569 offset))));
12570 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12571 gen_rtx_MEM (QImode,
12572 plus_constant (Pmode, base,
12573 offset + 1))));
12574 if (!BYTES_BIG_ENDIAN)
12575 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12576 gen_rtx_IOR (SImode,
12577 gen_rtx_ASHIFT
12578 (SImode,
12579 gen_rtx_SUBREG (SImode, operands[0], 0),
12580 GEN_INT (8)),
12581 scratch));
12582 else
12583 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12584 gen_rtx_IOR (SImode,
12585 gen_rtx_ASHIFT (SImode, scratch,
12586 GEN_INT (8)),
12587 gen_rtx_SUBREG (SImode, operands[0], 0)));
12590 /* Handle storing a half-word to memory during reload by synthesizing as two
12591 byte stores. Take care not to clobber the input values until after we
12592 have moved them somewhere safe. This code assumes that if the DImode
12593 scratch in operands[2] overlaps either the input value or output address
12594 in some way, then that value must die in this insn (we absolutely need
12595 two scratch registers for some corner cases). */
12596 void
12597 arm_reload_out_hi (rtx *operands)
12599 rtx ref = operands[0];
12600 rtx outval = operands[1];
12601 rtx base, scratch;
12602 HOST_WIDE_INT offset = 0;
12604 if (GET_CODE (ref) == SUBREG)
12606 offset = SUBREG_BYTE (ref);
12607 ref = SUBREG_REG (ref);
12610 if (REG_P (ref))
12612 /* We have a pseudo which has been spilt onto the stack; there
12613 are two cases here: the first where there is a simple
12614 stack-slot replacement and a second where the stack-slot is
12615 out of range, or is used as a subreg. */
12616 if (reg_equiv_mem (REGNO (ref)))
12618 ref = reg_equiv_mem (REGNO (ref));
12619 base = find_replacement (&XEXP (ref, 0));
12621 else
12622 /* The slot is out of range, or was dressed up in a SUBREG. */
12623 base = reg_equiv_address (REGNO (ref));
12625 else
12626 base = find_replacement (&XEXP (ref, 0));
12628 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12630 /* Handle the case where the address is too complex to be offset by 1. */
12631 if (GET_CODE (base) == MINUS
12632 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12634 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12636 /* Be careful not to destroy OUTVAL. */
12637 if (reg_overlap_mentioned_p (base_plus, outval))
12639 /* Updating base_plus might destroy outval, see if we can
12640 swap the scratch and base_plus. */
12641 if (!reg_overlap_mentioned_p (scratch, outval))
12643 rtx tmp = scratch;
12644 scratch = base_plus;
12645 base_plus = tmp;
12647 else
12649 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12651 /* Be conservative and copy OUTVAL into the scratch now,
12652 this should only be necessary if outval is a subreg
12653 of something larger than a word. */
12654 /* XXX Might this clobber base? I can't see how it can,
12655 since scratch is known to overlap with OUTVAL, and
12656 must be wider than a word. */
12657 emit_insn (gen_movhi (scratch_hi, outval));
12658 outval = scratch_hi;
12662 emit_set_insn (base_plus, base);
12663 base = base_plus;
12665 else if (GET_CODE (base) == PLUS)
12667 /* The addend must be CONST_INT, or we would have dealt with it above. */
12668 HOST_WIDE_INT hi, lo;
12670 offset += INTVAL (XEXP (base, 1));
12671 base = XEXP (base, 0);
12673 /* Rework the address into a legal sequence of insns. */
12674 /* Valid range for lo is -4095 -> 4095 */
12675 lo = (offset >= 0
12676 ? (offset & 0xfff)
12677 : -((-offset) & 0xfff));
12679 /* Corner case, if lo is the max offset then we would be out of range
12680 once we have added the additional 1 below, so bump the msb into the
12681 pre-loading insn(s). */
12682 if (lo == 4095)
12683 lo &= 0x7ff;
12685 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12686 ^ (HOST_WIDE_INT) 0x80000000)
12687 - (HOST_WIDE_INT) 0x80000000);
12689 gcc_assert (hi + lo == offset);
12691 if (hi != 0)
12693 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12695 /* Be careful not to destroy OUTVAL. */
12696 if (reg_overlap_mentioned_p (base_plus, outval))
12698 /* Updating base_plus might destroy outval, see if we
12699 can swap the scratch and base_plus. */
12700 if (!reg_overlap_mentioned_p (scratch, outval))
12702 rtx tmp = scratch;
12703 scratch = base_plus;
12704 base_plus = tmp;
12706 else
12708 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12710 /* Be conservative and copy outval into scratch now,
12711 this should only be necessary if outval is a
12712 subreg of something larger than a word. */
12713 /* XXX Might this clobber base? I can't see how it
12714 can, since scratch is known to overlap with
12715 outval. */
12716 emit_insn (gen_movhi (scratch_hi, outval));
12717 outval = scratch_hi;
12721 /* Get the base address; addsi3 knows how to handle constants
12722 that require more than one insn. */
12723 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12724 base = base_plus;
12725 offset = lo;
12729 if (BYTES_BIG_ENDIAN)
12731 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12732 plus_constant (Pmode, base,
12733 offset + 1)),
12734 gen_lowpart (QImode, outval)));
12735 emit_insn (gen_lshrsi3 (scratch,
12736 gen_rtx_SUBREG (SImode, outval, 0),
12737 GEN_INT (8)));
12738 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12739 offset)),
12740 gen_lowpart (QImode, scratch)));
12742 else
12744 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12745 offset)),
12746 gen_lowpart (QImode, outval)));
12747 emit_insn (gen_lshrsi3 (scratch,
12748 gen_rtx_SUBREG (SImode, outval, 0),
12749 GEN_INT (8)));
12750 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12751 plus_constant (Pmode, base,
12752 offset + 1)),
12753 gen_lowpart (QImode, scratch)));
12757 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12758 (padded to the size of a word) should be passed in a register. */
12760 static bool
12761 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12763 if (TARGET_AAPCS_BASED)
12764 return must_pass_in_stack_var_size (mode, type);
12765 else
12766 return must_pass_in_stack_var_size_or_pad (mode, type);
12770 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12771 Return true if an argument passed on the stack should be padded upwards,
12772 i.e. if the least-significant byte has useful data.
12773 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12774 aggregate types are placed in the lowest memory address. */
12776 bool
12777 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12779 if (!TARGET_AAPCS_BASED)
12780 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12782 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12783 return false;
12785 return true;
12789 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12790 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12791 register has useful data, and return the opposite if the most
12792 significant byte does. */
12794 bool
12795 arm_pad_reg_upward (enum machine_mode mode,
12796 tree type, int first ATTRIBUTE_UNUSED)
12798 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12800 /* For AAPCS, small aggregates, small fixed-point types,
12801 and small complex types are always padded upwards. */
12802 if (type)
12804 if ((AGGREGATE_TYPE_P (type)
12805 || TREE_CODE (type) == COMPLEX_TYPE
12806 || FIXED_POINT_TYPE_P (type))
12807 && int_size_in_bytes (type) <= 4)
12808 return true;
12810 else
12812 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12813 && GET_MODE_SIZE (mode) <= 4)
12814 return true;
12818 /* Otherwise, use default padding. */
12819 return !BYTES_BIG_ENDIAN;
12822 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12823 assuming that the address in the base register is word aligned. */
12824 bool
12825 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12827 HOST_WIDE_INT max_offset;
12829 /* Offset must be a multiple of 4 in Thumb mode. */
12830 if (TARGET_THUMB2 && ((offset & 3) != 0))
12831 return false;
12833 if (TARGET_THUMB2)
12834 max_offset = 1020;
12835 else if (TARGET_ARM)
12836 max_offset = 255;
12837 else
12838 return false;
12840 return ((offset <= max_offset) && (offset >= -max_offset));
12843 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12844 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12845 Assumes that the address in the base register RN is word aligned. Pattern
12846 guarantees that both memory accesses use the same base register,
12847 the offsets are constants within the range, and the gap between the offsets is 4.
12848 If preload complete then check that registers are legal. WBACK indicates whether
12849 address is updated. LOAD indicates whether memory access is load or store. */
12850 bool
12851 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12852 bool wback, bool load)
12854 unsigned int t, t2, n;
12856 if (!reload_completed)
12857 return true;
12859 if (!offset_ok_for_ldrd_strd (offset))
12860 return false;
12862 t = REGNO (rt);
12863 t2 = REGNO (rt2);
12864 n = REGNO (rn);
12866 if ((TARGET_THUMB2)
12867 && ((wback && (n == t || n == t2))
12868 || (t == SP_REGNUM)
12869 || (t == PC_REGNUM)
12870 || (t2 == SP_REGNUM)
12871 || (t2 == PC_REGNUM)
12872 || (!load && (n == PC_REGNUM))
12873 || (load && (t == t2))
12874 /* Triggers Cortex-M3 LDRD errata. */
12875 || (!wback && load && fix_cm3_ldrd && (n == t))))
12876 return false;
12878 if ((TARGET_ARM)
12879 && ((wback && (n == t || n == t2))
12880 || (t2 == PC_REGNUM)
12881 || (t % 2 != 0) /* First destination register is not even. */
12882 || (t2 != t + 1)
12883 /* PC can be used as base register (for offset addressing only),
12884 but it is depricated. */
12885 || (n == PC_REGNUM)))
12886 return false;
12888 return true;
12891 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
12892 operand MEM's address contains an immediate offset from the base
12893 register and has no side effects, in which case it sets BASE and
12894 OFFSET accordingly. */
12895 static bool
12896 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
12898 rtx addr;
12900 gcc_assert (base != NULL && offset != NULL);
12902 /* TODO: Handle more general memory operand patterns, such as
12903 PRE_DEC and PRE_INC. */
12905 if (side_effects_p (mem))
12906 return false;
12908 /* Can't deal with subregs. */
12909 if (GET_CODE (mem) == SUBREG)
12910 return false;
12912 gcc_assert (MEM_P (mem));
12914 *offset = const0_rtx;
12916 addr = XEXP (mem, 0);
12918 /* If addr isn't valid for DImode, then we can't handle it. */
12919 if (!arm_legitimate_address_p (DImode, addr,
12920 reload_in_progress || reload_completed))
12921 return false;
12923 if (REG_P (addr))
12925 *base = addr;
12926 return true;
12928 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
12930 *base = XEXP (addr, 0);
12931 *offset = XEXP (addr, 1);
12932 return (REG_P (*base) && CONST_INT_P (*offset));
12935 return false;
12938 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
12940 /* Called from a peephole2 to replace two word-size accesses with a
12941 single LDRD/STRD instruction. Returns true iff we can generate a
12942 new instruction sequence. That is, both accesses use the same base
12943 register and the gap between constant offsets is 4. This function
12944 may reorder its operands to match ldrd/strd RTL templates.
12945 OPERANDS are the operands found by the peephole matcher;
12946 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
12947 corresponding memory operands. LOAD indicaates whether the access
12948 is load or store. CONST_STORE indicates a store of constant
12949 integer values held in OPERANDS[4,5] and assumes that the pattern
12950 is of length 4 insn, for the purpose of checking dead registers.
12951 COMMUTE indicates that register operands may be reordered. */
12952 bool
12953 gen_operands_ldrd_strd (rtx *operands, bool load,
12954 bool const_store, bool commute)
12956 int nops = 2;
12957 HOST_WIDE_INT offsets[2], offset;
12958 rtx base = NULL_RTX;
12959 rtx cur_base, cur_offset, tmp;
12960 int i, gap;
12961 HARD_REG_SET regset;
12963 gcc_assert (!const_store || !load);
12964 /* Check that the memory references are immediate offsets from the
12965 same base register. Extract the base register, the destination
12966 registers, and the corresponding memory offsets. */
12967 for (i = 0; i < nops; i++)
12969 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
12970 return false;
12972 if (i == 0)
12973 base = cur_base;
12974 else if (REGNO (base) != REGNO (cur_base))
12975 return false;
12977 offsets[i] = INTVAL (cur_offset);
12978 if (GET_CODE (operands[i]) == SUBREG)
12980 tmp = SUBREG_REG (operands[i]);
12981 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
12982 operands[i] = tmp;
12986 /* Make sure there is no dependency between the individual loads. */
12987 if (load && REGNO (operands[0]) == REGNO (base))
12988 return false; /* RAW */
12990 if (load && REGNO (operands[0]) == REGNO (operands[1]))
12991 return false; /* WAW */
12993 /* If the same input register is used in both stores
12994 when storing different constants, try to find a free register.
12995 For example, the code
12996 mov r0, 0
12997 str r0, [r2]
12998 mov r0, 1
12999 str r0, [r2, #4]
13000 can be transformed into
13001 mov r1, 0
13002 strd r1, r0, [r2]
13003 in Thumb mode assuming that r1 is free. */
13004 if (const_store
13005 && REGNO (operands[0]) == REGNO (operands[1])
13006 && INTVAL (operands[4]) != INTVAL (operands[5]))
13008 if (TARGET_THUMB2)
13010 CLEAR_HARD_REG_SET (regset);
13011 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
13012 if (tmp == NULL_RTX)
13013 return false;
13015 /* Use the new register in the first load to ensure that
13016 if the original input register is not dead after peephole,
13017 then it will have the correct constant value. */
13018 operands[0] = tmp;
13020 else if (TARGET_ARM)
13022 return false;
13023 int regno = REGNO (operands[0]);
13024 if (!peep2_reg_dead_p (4, operands[0]))
13026 /* When the input register is even and is not dead after the
13027 pattern, it has to hold the second constant but we cannot
13028 form a legal STRD in ARM mode with this register as the second
13029 register. */
13030 if (regno % 2 == 0)
13031 return false;
13033 /* Is regno-1 free? */
13034 SET_HARD_REG_SET (regset);
13035 CLEAR_HARD_REG_BIT(regset, regno - 1);
13036 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
13037 if (tmp == NULL_RTX)
13038 return false;
13040 operands[0] = tmp;
13042 else
13044 /* Find a DImode register. */
13045 CLEAR_HARD_REG_SET (regset);
13046 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
13047 if (tmp != NULL_RTX)
13049 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13050 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13052 else
13054 /* Can we use the input register to form a DI register? */
13055 SET_HARD_REG_SET (regset);
13056 CLEAR_HARD_REG_BIT(regset,
13057 regno % 2 == 0 ? regno + 1 : regno - 1);
13058 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
13059 if (tmp == NULL_RTX)
13060 return false;
13061 operands[regno % 2 == 1 ? 0 : 1] = tmp;
13065 gcc_assert (operands[0] != NULL_RTX);
13066 gcc_assert (operands[1] != NULL_RTX);
13067 gcc_assert (REGNO (operands[0]) % 2 == 0);
13068 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
13072 /* Make sure the instructions are ordered with lower memory access first. */
13073 if (offsets[0] > offsets[1])
13075 gap = offsets[0] - offsets[1];
13076 offset = offsets[1];
13078 /* Swap the instructions such that lower memory is accessed first. */
13079 SWAP_RTX (operands[0], operands[1]);
13080 SWAP_RTX (operands[2], operands[3]);
13081 if (const_store)
13082 SWAP_RTX (operands[4], operands[5]);
13084 else
13086 gap = offsets[1] - offsets[0];
13087 offset = offsets[0];
13090 /* Make sure accesses are to consecutive memory locations. */
13091 if (gap != 4)
13092 return false;
13094 /* Make sure we generate legal instructions. */
13095 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13096 false, load))
13097 return true;
13099 /* In Thumb state, where registers are almost unconstrained, there
13100 is little hope to fix it. */
13101 if (TARGET_THUMB2)
13102 return false;
13104 if (load && commute)
13106 /* Try reordering registers. */
13107 SWAP_RTX (operands[0], operands[1]);
13108 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13109 false, load))
13110 return true;
13113 if (const_store)
13115 /* If input registers are dead after this pattern, they can be
13116 reordered or replaced by other registers that are free in the
13117 current pattern. */
13118 if (!peep2_reg_dead_p (4, operands[0])
13119 || !peep2_reg_dead_p (4, operands[1]))
13120 return false;
13122 /* Try to reorder the input registers. */
13123 /* For example, the code
13124 mov r0, 0
13125 mov r1, 1
13126 str r1, [r2]
13127 str r0, [r2, #4]
13128 can be transformed into
13129 mov r1, 0
13130 mov r0, 1
13131 strd r0, [r2]
13133 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
13134 false, false))
13136 SWAP_RTX (operands[0], operands[1]);
13137 return true;
13140 /* Try to find a free DI register. */
13141 CLEAR_HARD_REG_SET (regset);
13142 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
13143 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
13144 while (true)
13146 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
13147 if (tmp == NULL_RTX)
13148 return false;
13150 /* DREG must be an even-numbered register in DImode.
13151 Split it into SI registers. */
13152 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13153 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13154 gcc_assert (operands[0] != NULL_RTX);
13155 gcc_assert (operands[1] != NULL_RTX);
13156 gcc_assert (REGNO (operands[0]) % 2 == 0);
13157 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
13159 return (operands_ok_ldrd_strd (operands[0], operands[1],
13160 base, offset,
13161 false, load));
13165 return false;
13167 #undef SWAP_RTX
13172 /* Print a symbolic form of X to the debug file, F. */
13173 static void
13174 arm_print_value (FILE *f, rtx x)
13176 switch (GET_CODE (x))
13178 case CONST_INT:
13179 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
13180 return;
13182 case CONST_DOUBLE:
13183 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
13184 return;
13186 case CONST_VECTOR:
13188 int i;
13190 fprintf (f, "<");
13191 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
13193 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
13194 if (i < (CONST_VECTOR_NUNITS (x) - 1))
13195 fputc (',', f);
13197 fprintf (f, ">");
13199 return;
13201 case CONST_STRING:
13202 fprintf (f, "\"%s\"", XSTR (x, 0));
13203 return;
13205 case SYMBOL_REF:
13206 fprintf (f, "`%s'", XSTR (x, 0));
13207 return;
13209 case LABEL_REF:
13210 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
13211 return;
13213 case CONST:
13214 arm_print_value (f, XEXP (x, 0));
13215 return;
13217 case PLUS:
13218 arm_print_value (f, XEXP (x, 0));
13219 fprintf (f, "+");
13220 arm_print_value (f, XEXP (x, 1));
13221 return;
13223 case PC:
13224 fprintf (f, "pc");
13225 return;
13227 default:
13228 fprintf (f, "????");
13229 return;
13233 /* Routines for manipulation of the constant pool. */
13235 /* Arm instructions cannot load a large constant directly into a
13236 register; they have to come from a pc relative load. The constant
13237 must therefore be placed in the addressable range of the pc
13238 relative load. Depending on the precise pc relative load
13239 instruction the range is somewhere between 256 bytes and 4k. This
13240 means that we often have to dump a constant inside a function, and
13241 generate code to branch around it.
13243 It is important to minimize this, since the branches will slow
13244 things down and make the code larger.
13246 Normally we can hide the table after an existing unconditional
13247 branch so that there is no interruption of the flow, but in the
13248 worst case the code looks like this:
13250 ldr rn, L1
13252 b L2
13253 align
13254 L1: .long value
13258 ldr rn, L3
13260 b L4
13261 align
13262 L3: .long value
13266 We fix this by performing a scan after scheduling, which notices
13267 which instructions need to have their operands fetched from the
13268 constant table and builds the table.
13270 The algorithm starts by building a table of all the constants that
13271 need fixing up and all the natural barriers in the function (places
13272 where a constant table can be dropped without breaking the flow).
13273 For each fixup we note how far the pc-relative replacement will be
13274 able to reach and the offset of the instruction into the function.
13276 Having built the table we then group the fixes together to form
13277 tables that are as large as possible (subject to addressing
13278 constraints) and emit each table of constants after the last
13279 barrier that is within range of all the instructions in the group.
13280 If a group does not contain a barrier, then we forcibly create one
13281 by inserting a jump instruction into the flow. Once the table has
13282 been inserted, the insns are then modified to reference the
13283 relevant entry in the pool.
13285 Possible enhancements to the algorithm (not implemented) are:
13287 1) For some processors and object formats, there may be benefit in
13288 aligning the pools to the start of cache lines; this alignment
13289 would need to be taken into account when calculating addressability
13290 of a pool. */
13292 /* These typedefs are located at the start of this file, so that
13293 they can be used in the prototypes there. This comment is to
13294 remind readers of that fact so that the following structures
13295 can be understood more easily.
13297 typedef struct minipool_node Mnode;
13298 typedef struct minipool_fixup Mfix; */
13300 struct minipool_node
13302 /* Doubly linked chain of entries. */
13303 Mnode * next;
13304 Mnode * prev;
13305 /* The maximum offset into the code that this entry can be placed. While
13306 pushing fixes for forward references, all entries are sorted in order
13307 of increasing max_address. */
13308 HOST_WIDE_INT max_address;
13309 /* Similarly for an entry inserted for a backwards ref. */
13310 HOST_WIDE_INT min_address;
13311 /* The number of fixes referencing this entry. This can become zero
13312 if we "unpush" an entry. In this case we ignore the entry when we
13313 come to emit the code. */
13314 int refcount;
13315 /* The offset from the start of the minipool. */
13316 HOST_WIDE_INT offset;
13317 /* The value in table. */
13318 rtx value;
13319 /* The mode of value. */
13320 enum machine_mode mode;
13321 /* The size of the value. With iWMMXt enabled
13322 sizes > 4 also imply an alignment of 8-bytes. */
13323 int fix_size;
13326 struct minipool_fixup
13328 Mfix * next;
13329 rtx insn;
13330 HOST_WIDE_INT address;
13331 rtx * loc;
13332 enum machine_mode mode;
13333 int fix_size;
13334 rtx value;
13335 Mnode * minipool;
13336 HOST_WIDE_INT forwards;
13337 HOST_WIDE_INT backwards;
13340 /* Fixes less than a word need padding out to a word boundary. */
13341 #define MINIPOOL_FIX_SIZE(mode) \
13342 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
13344 static Mnode * minipool_vector_head;
13345 static Mnode * minipool_vector_tail;
13346 static rtx minipool_vector_label;
13347 static int minipool_pad;
13349 /* The linked list of all minipool fixes required for this function. */
13350 Mfix * minipool_fix_head;
13351 Mfix * minipool_fix_tail;
13352 /* The fix entry for the current minipool, once it has been placed. */
13353 Mfix * minipool_barrier;
13355 /* Determines if INSN is the start of a jump table. Returns the end
13356 of the TABLE or NULL_RTX. */
13357 static rtx
13358 is_jump_table (rtx insn)
13360 rtx table;
13362 if (jump_to_label_p (insn)
13363 && ((table = next_real_insn (JUMP_LABEL (insn)))
13364 == next_real_insn (insn))
13365 && table != NULL
13366 && JUMP_P (table)
13367 && (GET_CODE (PATTERN (table)) == ADDR_VEC
13368 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
13369 return table;
13371 return NULL_RTX;
13374 #ifndef JUMP_TABLES_IN_TEXT_SECTION
13375 #define JUMP_TABLES_IN_TEXT_SECTION 0
13376 #endif
13378 static HOST_WIDE_INT
13379 get_jump_table_size (rtx insn)
13381 /* ADDR_VECs only take room if read-only data does into the text
13382 section. */
13383 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
13385 rtx body = PATTERN (insn);
13386 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
13387 HOST_WIDE_INT size;
13388 HOST_WIDE_INT modesize;
13390 modesize = GET_MODE_SIZE (GET_MODE (body));
13391 size = modesize * XVECLEN (body, elt);
13392 switch (modesize)
13394 case 1:
13395 /* Round up size of TBB table to a halfword boundary. */
13396 size = (size + 1) & ~(HOST_WIDE_INT)1;
13397 break;
13398 case 2:
13399 /* No padding necessary for TBH. */
13400 break;
13401 case 4:
13402 /* Add two bytes for alignment on Thumb. */
13403 if (TARGET_THUMB)
13404 size += 2;
13405 break;
13406 default:
13407 gcc_unreachable ();
13409 return size;
13412 return 0;
13415 /* Return the maximum amount of padding that will be inserted before
13416 label LABEL. */
13418 static HOST_WIDE_INT
13419 get_label_padding (rtx label)
13421 HOST_WIDE_INT align, min_insn_size;
13423 align = 1 << label_to_alignment (label);
13424 min_insn_size = TARGET_THUMB ? 2 : 4;
13425 return align > min_insn_size ? align - min_insn_size : 0;
13428 /* Move a minipool fix MP from its current location to before MAX_MP.
13429 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
13430 constraints may need updating. */
13431 static Mnode *
13432 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
13433 HOST_WIDE_INT max_address)
13435 /* The code below assumes these are different. */
13436 gcc_assert (mp != max_mp);
13438 if (max_mp == NULL)
13440 if (max_address < mp->max_address)
13441 mp->max_address = max_address;
13443 else
13445 if (max_address > max_mp->max_address - mp->fix_size)
13446 mp->max_address = max_mp->max_address - mp->fix_size;
13447 else
13448 mp->max_address = max_address;
13450 /* Unlink MP from its current position. Since max_mp is non-null,
13451 mp->prev must be non-null. */
13452 mp->prev->next = mp->next;
13453 if (mp->next != NULL)
13454 mp->next->prev = mp->prev;
13455 else
13456 minipool_vector_tail = mp->prev;
13458 /* Re-insert it before MAX_MP. */
13459 mp->next = max_mp;
13460 mp->prev = max_mp->prev;
13461 max_mp->prev = mp;
13463 if (mp->prev != NULL)
13464 mp->prev->next = mp;
13465 else
13466 minipool_vector_head = mp;
13469 /* Save the new entry. */
13470 max_mp = mp;
13472 /* Scan over the preceding entries and adjust their addresses as
13473 required. */
13474 while (mp->prev != NULL
13475 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13477 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13478 mp = mp->prev;
13481 return max_mp;
13484 /* Add a constant to the minipool for a forward reference. Returns the
13485 node added or NULL if the constant will not fit in this pool. */
13486 static Mnode *
13487 add_minipool_forward_ref (Mfix *fix)
13489 /* If set, max_mp is the first pool_entry that has a lower
13490 constraint than the one we are trying to add. */
13491 Mnode * max_mp = NULL;
13492 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
13493 Mnode * mp;
13495 /* If the minipool starts before the end of FIX->INSN then this FIX
13496 can not be placed into the current pool. Furthermore, adding the
13497 new constant pool entry may cause the pool to start FIX_SIZE bytes
13498 earlier. */
13499 if (minipool_vector_head &&
13500 (fix->address + get_attr_length (fix->insn)
13501 >= minipool_vector_head->max_address - fix->fix_size))
13502 return NULL;
13504 /* Scan the pool to see if a constant with the same value has
13505 already been added. While we are doing this, also note the
13506 location where we must insert the constant if it doesn't already
13507 exist. */
13508 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13510 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13511 && fix->mode == mp->mode
13512 && (!LABEL_P (fix->value)
13513 || (CODE_LABEL_NUMBER (fix->value)
13514 == CODE_LABEL_NUMBER (mp->value)))
13515 && rtx_equal_p (fix->value, mp->value))
13517 /* More than one fix references this entry. */
13518 mp->refcount++;
13519 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
13522 /* Note the insertion point if necessary. */
13523 if (max_mp == NULL
13524 && mp->max_address > max_address)
13525 max_mp = mp;
13527 /* If we are inserting an 8-bytes aligned quantity and
13528 we have not already found an insertion point, then
13529 make sure that all such 8-byte aligned quantities are
13530 placed at the start of the pool. */
13531 if (ARM_DOUBLEWORD_ALIGN
13532 && max_mp == NULL
13533 && fix->fix_size >= 8
13534 && mp->fix_size < 8)
13536 max_mp = mp;
13537 max_address = mp->max_address;
13541 /* The value is not currently in the minipool, so we need to create
13542 a new entry for it. If MAX_MP is NULL, the entry will be put on
13543 the end of the list since the placement is less constrained than
13544 any existing entry. Otherwise, we insert the new fix before
13545 MAX_MP and, if necessary, adjust the constraints on the other
13546 entries. */
13547 mp = XNEW (Mnode);
13548 mp->fix_size = fix->fix_size;
13549 mp->mode = fix->mode;
13550 mp->value = fix->value;
13551 mp->refcount = 1;
13552 /* Not yet required for a backwards ref. */
13553 mp->min_address = -65536;
13555 if (max_mp == NULL)
13557 mp->max_address = max_address;
13558 mp->next = NULL;
13559 mp->prev = minipool_vector_tail;
13561 if (mp->prev == NULL)
13563 minipool_vector_head = mp;
13564 minipool_vector_label = gen_label_rtx ();
13566 else
13567 mp->prev->next = mp;
13569 minipool_vector_tail = mp;
13571 else
13573 if (max_address > max_mp->max_address - mp->fix_size)
13574 mp->max_address = max_mp->max_address - mp->fix_size;
13575 else
13576 mp->max_address = max_address;
13578 mp->next = max_mp;
13579 mp->prev = max_mp->prev;
13580 max_mp->prev = mp;
13581 if (mp->prev != NULL)
13582 mp->prev->next = mp;
13583 else
13584 minipool_vector_head = mp;
13587 /* Save the new entry. */
13588 max_mp = mp;
13590 /* Scan over the preceding entries and adjust their addresses as
13591 required. */
13592 while (mp->prev != NULL
13593 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13595 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13596 mp = mp->prev;
13599 return max_mp;
13602 static Mnode *
13603 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13604 HOST_WIDE_INT min_address)
13606 HOST_WIDE_INT offset;
13608 /* The code below assumes these are different. */
13609 gcc_assert (mp != min_mp);
13611 if (min_mp == NULL)
13613 if (min_address > mp->min_address)
13614 mp->min_address = min_address;
13616 else
13618 /* We will adjust this below if it is too loose. */
13619 mp->min_address = min_address;
13621 /* Unlink MP from its current position. Since min_mp is non-null,
13622 mp->next must be non-null. */
13623 mp->next->prev = mp->prev;
13624 if (mp->prev != NULL)
13625 mp->prev->next = mp->next;
13626 else
13627 minipool_vector_head = mp->next;
13629 /* Reinsert it after MIN_MP. */
13630 mp->prev = min_mp;
13631 mp->next = min_mp->next;
13632 min_mp->next = mp;
13633 if (mp->next != NULL)
13634 mp->next->prev = mp;
13635 else
13636 minipool_vector_tail = mp;
13639 min_mp = mp;
13641 offset = 0;
13642 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13644 mp->offset = offset;
13645 if (mp->refcount > 0)
13646 offset += mp->fix_size;
13648 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13649 mp->next->min_address = mp->min_address + mp->fix_size;
13652 return min_mp;
13655 /* Add a constant to the minipool for a backward reference. Returns the
13656 node added or NULL if the constant will not fit in this pool.
13658 Note that the code for insertion for a backwards reference can be
13659 somewhat confusing because the calculated offsets for each fix do
13660 not take into account the size of the pool (which is still under
13661 construction. */
13662 static Mnode *
13663 add_minipool_backward_ref (Mfix *fix)
13665 /* If set, min_mp is the last pool_entry that has a lower constraint
13666 than the one we are trying to add. */
13667 Mnode *min_mp = NULL;
13668 /* This can be negative, since it is only a constraint. */
13669 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13670 Mnode *mp;
13672 /* If we can't reach the current pool from this insn, or if we can't
13673 insert this entry at the end of the pool without pushing other
13674 fixes out of range, then we don't try. This ensures that we
13675 can't fail later on. */
13676 if (min_address >= minipool_barrier->address
13677 || (minipool_vector_tail->min_address + fix->fix_size
13678 >= minipool_barrier->address))
13679 return NULL;
13681 /* Scan the pool to see if a constant with the same value has
13682 already been added. While we are doing this, also note the
13683 location where we must insert the constant if it doesn't already
13684 exist. */
13685 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13687 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13688 && fix->mode == mp->mode
13689 && (!LABEL_P (fix->value)
13690 || (CODE_LABEL_NUMBER (fix->value)
13691 == CODE_LABEL_NUMBER (mp->value)))
13692 && rtx_equal_p (fix->value, mp->value)
13693 /* Check that there is enough slack to move this entry to the
13694 end of the table (this is conservative). */
13695 && (mp->max_address
13696 > (minipool_barrier->address
13697 + minipool_vector_tail->offset
13698 + minipool_vector_tail->fix_size)))
13700 mp->refcount++;
13701 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13704 if (min_mp != NULL)
13705 mp->min_address += fix->fix_size;
13706 else
13708 /* Note the insertion point if necessary. */
13709 if (mp->min_address < min_address)
13711 /* For now, we do not allow the insertion of 8-byte alignment
13712 requiring nodes anywhere but at the start of the pool. */
13713 if (ARM_DOUBLEWORD_ALIGN
13714 && fix->fix_size >= 8 && mp->fix_size < 8)
13715 return NULL;
13716 else
13717 min_mp = mp;
13719 else if (mp->max_address
13720 < minipool_barrier->address + mp->offset + fix->fix_size)
13722 /* Inserting before this entry would push the fix beyond
13723 its maximum address (which can happen if we have
13724 re-located a forwards fix); force the new fix to come
13725 after it. */
13726 if (ARM_DOUBLEWORD_ALIGN
13727 && fix->fix_size >= 8 && mp->fix_size < 8)
13728 return NULL;
13729 else
13731 min_mp = mp;
13732 min_address = mp->min_address + fix->fix_size;
13735 /* Do not insert a non-8-byte aligned quantity before 8-byte
13736 aligned quantities. */
13737 else if (ARM_DOUBLEWORD_ALIGN
13738 && fix->fix_size < 8
13739 && mp->fix_size >= 8)
13741 min_mp = mp;
13742 min_address = mp->min_address + fix->fix_size;
13747 /* We need to create a new entry. */
13748 mp = XNEW (Mnode);
13749 mp->fix_size = fix->fix_size;
13750 mp->mode = fix->mode;
13751 mp->value = fix->value;
13752 mp->refcount = 1;
13753 mp->max_address = minipool_barrier->address + 65536;
13755 mp->min_address = min_address;
13757 if (min_mp == NULL)
13759 mp->prev = NULL;
13760 mp->next = minipool_vector_head;
13762 if (mp->next == NULL)
13764 minipool_vector_tail = mp;
13765 minipool_vector_label = gen_label_rtx ();
13767 else
13768 mp->next->prev = mp;
13770 minipool_vector_head = mp;
13772 else
13774 mp->next = min_mp->next;
13775 mp->prev = min_mp;
13776 min_mp->next = mp;
13778 if (mp->next != NULL)
13779 mp->next->prev = mp;
13780 else
13781 minipool_vector_tail = mp;
13784 /* Save the new entry. */
13785 min_mp = mp;
13787 if (mp->prev)
13788 mp = mp->prev;
13789 else
13790 mp->offset = 0;
13792 /* Scan over the following entries and adjust their offsets. */
13793 while (mp->next != NULL)
13795 if (mp->next->min_address < mp->min_address + mp->fix_size)
13796 mp->next->min_address = mp->min_address + mp->fix_size;
13798 if (mp->refcount)
13799 mp->next->offset = mp->offset + mp->fix_size;
13800 else
13801 mp->next->offset = mp->offset;
13803 mp = mp->next;
13806 return min_mp;
13809 static void
13810 assign_minipool_offsets (Mfix *barrier)
13812 HOST_WIDE_INT offset = 0;
13813 Mnode *mp;
13815 minipool_barrier = barrier;
13817 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13819 mp->offset = offset;
13821 if (mp->refcount > 0)
13822 offset += mp->fix_size;
13826 /* Output the literal table */
13827 static void
13828 dump_minipool (rtx scan)
13830 Mnode * mp;
13831 Mnode * nmp;
13832 int align64 = 0;
13834 if (ARM_DOUBLEWORD_ALIGN)
13835 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13836 if (mp->refcount > 0 && mp->fix_size >= 8)
13838 align64 = 1;
13839 break;
13842 if (dump_file)
13843 fprintf (dump_file,
13844 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13845 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13847 scan = emit_label_after (gen_label_rtx (), scan);
13848 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13849 scan = emit_label_after (minipool_vector_label, scan);
13851 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13853 if (mp->refcount > 0)
13855 if (dump_file)
13857 fprintf (dump_file,
13858 ";; Offset %u, min %ld, max %ld ",
13859 (unsigned) mp->offset, (unsigned long) mp->min_address,
13860 (unsigned long) mp->max_address);
13861 arm_print_value (dump_file, mp->value);
13862 fputc ('\n', dump_file);
13865 switch (mp->fix_size)
13867 #ifdef HAVE_consttable_1
13868 case 1:
13869 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13870 break;
13872 #endif
13873 #ifdef HAVE_consttable_2
13874 case 2:
13875 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13876 break;
13878 #endif
13879 #ifdef HAVE_consttable_4
13880 case 4:
13881 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13882 break;
13884 #endif
13885 #ifdef HAVE_consttable_8
13886 case 8:
13887 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13888 break;
13890 #endif
13891 #ifdef HAVE_consttable_16
13892 case 16:
13893 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13894 break;
13896 #endif
13897 default:
13898 gcc_unreachable ();
13902 nmp = mp->next;
13903 free (mp);
13906 minipool_vector_head = minipool_vector_tail = NULL;
13907 scan = emit_insn_after (gen_consttable_end (), scan);
13908 scan = emit_barrier_after (scan);
13911 /* Return the cost of forcibly inserting a barrier after INSN. */
13912 static int
13913 arm_barrier_cost (rtx insn)
13915 /* Basing the location of the pool on the loop depth is preferable,
13916 but at the moment, the basic block information seems to be
13917 corrupt by this stage of the compilation. */
13918 int base_cost = 50;
13919 rtx next = next_nonnote_insn (insn);
13921 if (next != NULL && LABEL_P (next))
13922 base_cost -= 20;
13924 switch (GET_CODE (insn))
13926 case CODE_LABEL:
13927 /* It will always be better to place the table before the label, rather
13928 than after it. */
13929 return 50;
13931 case INSN:
13932 case CALL_INSN:
13933 return base_cost;
13935 case JUMP_INSN:
13936 return base_cost - 10;
13938 default:
13939 return base_cost + 10;
13943 /* Find the best place in the insn stream in the range
13944 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13945 Create the barrier by inserting a jump and add a new fix entry for
13946 it. */
13947 static Mfix *
13948 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13950 HOST_WIDE_INT count = 0;
13951 rtx barrier;
13952 rtx from = fix->insn;
13953 /* The instruction after which we will insert the jump. */
13954 rtx selected = NULL;
13955 int selected_cost;
13956 /* The address at which the jump instruction will be placed. */
13957 HOST_WIDE_INT selected_address;
13958 Mfix * new_fix;
13959 HOST_WIDE_INT max_count = max_address - fix->address;
13960 rtx label = gen_label_rtx ();
13962 selected_cost = arm_barrier_cost (from);
13963 selected_address = fix->address;
13965 while (from && count < max_count)
13967 rtx tmp;
13968 int new_cost;
13970 /* This code shouldn't have been called if there was a natural barrier
13971 within range. */
13972 gcc_assert (!BARRIER_P (from));
13974 /* Count the length of this insn. This must stay in sync with the
13975 code that pushes minipool fixes. */
13976 if (LABEL_P (from))
13977 count += get_label_padding (from);
13978 else
13979 count += get_attr_length (from);
13981 /* If there is a jump table, add its length. */
13982 tmp = is_jump_table (from);
13983 if (tmp != NULL)
13985 count += get_jump_table_size (tmp);
13987 /* Jump tables aren't in a basic block, so base the cost on
13988 the dispatch insn. If we select this location, we will
13989 still put the pool after the table. */
13990 new_cost = arm_barrier_cost (from);
13992 if (count < max_count
13993 && (!selected || new_cost <= selected_cost))
13995 selected = tmp;
13996 selected_cost = new_cost;
13997 selected_address = fix->address + count;
14000 /* Continue after the dispatch table. */
14001 from = NEXT_INSN (tmp);
14002 continue;
14005 new_cost = arm_barrier_cost (from);
14007 if (count < max_count
14008 && (!selected || new_cost <= selected_cost))
14010 selected = from;
14011 selected_cost = new_cost;
14012 selected_address = fix->address + count;
14015 from = NEXT_INSN (from);
14018 /* Make sure that we found a place to insert the jump. */
14019 gcc_assert (selected);
14021 /* Make sure we do not split a call and its corresponding
14022 CALL_ARG_LOCATION note. */
14023 if (CALL_P (selected))
14025 rtx next = NEXT_INSN (selected);
14026 if (next && NOTE_P (next)
14027 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
14028 selected = next;
14031 /* Create a new JUMP_INSN that branches around a barrier. */
14032 from = emit_jump_insn_after (gen_jump (label), selected);
14033 JUMP_LABEL (from) = label;
14034 barrier = emit_barrier_after (from);
14035 emit_label_after (label, barrier);
14037 /* Create a minipool barrier entry for the new barrier. */
14038 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
14039 new_fix->insn = barrier;
14040 new_fix->address = selected_address;
14041 new_fix->next = fix->next;
14042 fix->next = new_fix;
14044 return new_fix;
14047 /* Record that there is a natural barrier in the insn stream at
14048 ADDRESS. */
14049 static void
14050 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
14052 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
14054 fix->insn = insn;
14055 fix->address = address;
14057 fix->next = NULL;
14058 if (minipool_fix_head != NULL)
14059 minipool_fix_tail->next = fix;
14060 else
14061 minipool_fix_head = fix;
14063 minipool_fix_tail = fix;
14066 /* Record INSN, which will need fixing up to load a value from the
14067 minipool. ADDRESS is the offset of the insn since the start of the
14068 function; LOC is a pointer to the part of the insn which requires
14069 fixing; VALUE is the constant that must be loaded, which is of type
14070 MODE. */
14071 static void
14072 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
14073 enum machine_mode mode, rtx value)
14075 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
14077 fix->insn = insn;
14078 fix->address = address;
14079 fix->loc = loc;
14080 fix->mode = mode;
14081 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
14082 fix->value = value;
14083 fix->forwards = get_attr_pool_range (insn);
14084 fix->backwards = get_attr_neg_pool_range (insn);
14085 fix->minipool = NULL;
14087 /* If an insn doesn't have a range defined for it, then it isn't
14088 expecting to be reworked by this code. Better to stop now than
14089 to generate duff assembly code. */
14090 gcc_assert (fix->forwards || fix->backwards);
14092 /* If an entry requires 8-byte alignment then assume all constant pools
14093 require 4 bytes of padding. Trying to do this later on a per-pool
14094 basis is awkward because existing pool entries have to be modified. */
14095 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
14096 minipool_pad = 4;
14098 if (dump_file)
14100 fprintf (dump_file,
14101 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
14102 GET_MODE_NAME (mode),
14103 INSN_UID (insn), (unsigned long) address,
14104 -1 * (long)fix->backwards, (long)fix->forwards);
14105 arm_print_value (dump_file, fix->value);
14106 fprintf (dump_file, "\n");
14109 /* Add it to the chain of fixes. */
14110 fix->next = NULL;
14112 if (minipool_fix_head != NULL)
14113 minipool_fix_tail->next = fix;
14114 else
14115 minipool_fix_head = fix;
14117 minipool_fix_tail = fix;
14120 /* Return the cost of synthesizing a 64-bit constant VAL inline.
14121 Returns the number of insns needed, or 99 if we don't know how to
14122 do it. */
14124 arm_const_double_inline_cost (rtx val)
14126 rtx lowpart, highpart;
14127 enum machine_mode mode;
14129 mode = GET_MODE (val);
14131 if (mode == VOIDmode)
14132 mode = DImode;
14134 gcc_assert (GET_MODE_SIZE (mode) == 8);
14136 lowpart = gen_lowpart (SImode, val);
14137 highpart = gen_highpart_mode (SImode, mode, val);
14139 gcc_assert (CONST_INT_P (lowpart));
14140 gcc_assert (CONST_INT_P (highpart));
14142 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
14143 NULL_RTX, NULL_RTX, 0, 0)
14144 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
14145 NULL_RTX, NULL_RTX, 0, 0));
14148 /* Return true if it is worthwhile to split a 64-bit constant into two
14149 32-bit operations. This is the case if optimizing for size, or
14150 if we have load delay slots, or if one 32-bit part can be done with
14151 a single data operation. */
14152 bool
14153 arm_const_double_by_parts (rtx val)
14155 enum machine_mode mode = GET_MODE (val);
14156 rtx part;
14158 if (optimize_size || arm_ld_sched)
14159 return true;
14161 if (mode == VOIDmode)
14162 mode = DImode;
14164 part = gen_highpart_mode (SImode, mode, val);
14166 gcc_assert (CONST_INT_P (part));
14168 if (const_ok_for_arm (INTVAL (part))
14169 || const_ok_for_arm (~INTVAL (part)))
14170 return true;
14172 part = gen_lowpart (SImode, val);
14174 gcc_assert (CONST_INT_P (part));
14176 if (const_ok_for_arm (INTVAL (part))
14177 || const_ok_for_arm (~INTVAL (part)))
14178 return true;
14180 return false;
14183 /* Return true if it is possible to inline both the high and low parts
14184 of a 64-bit constant into 32-bit data processing instructions. */
14185 bool
14186 arm_const_double_by_immediates (rtx val)
14188 enum machine_mode mode = GET_MODE (val);
14189 rtx part;
14191 if (mode == VOIDmode)
14192 mode = DImode;
14194 part = gen_highpart_mode (SImode, mode, val);
14196 gcc_assert (CONST_INT_P (part));
14198 if (!const_ok_for_arm (INTVAL (part)))
14199 return false;
14201 part = gen_lowpart (SImode, val);
14203 gcc_assert (CONST_INT_P (part));
14205 if (!const_ok_for_arm (INTVAL (part)))
14206 return false;
14208 return true;
14211 /* Scan INSN and note any of its operands that need fixing.
14212 If DO_PUSHES is false we do not actually push any of the fixups
14213 needed. */
14214 static void
14215 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
14217 int opno;
14219 extract_insn (insn);
14221 if (!constrain_operands (1))
14222 fatal_insn_not_found (insn);
14224 if (recog_data.n_alternatives == 0)
14225 return;
14227 /* Fill in recog_op_alt with information about the constraints of
14228 this insn. */
14229 preprocess_constraints ();
14231 for (opno = 0; opno < recog_data.n_operands; opno++)
14233 /* Things we need to fix can only occur in inputs. */
14234 if (recog_data.operand_type[opno] != OP_IN)
14235 continue;
14237 /* If this alternative is a memory reference, then any mention
14238 of constants in this alternative is really to fool reload
14239 into allowing us to accept one there. We need to fix them up
14240 now so that we output the right code. */
14241 if (recog_op_alt[opno][which_alternative].memory_ok)
14243 rtx op = recog_data.operand[opno];
14245 if (CONSTANT_P (op))
14247 if (do_pushes)
14248 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
14249 recog_data.operand_mode[opno], op);
14251 else if (MEM_P (op)
14252 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
14253 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
14255 if (do_pushes)
14257 rtx cop = avoid_constant_pool_reference (op);
14259 /* Casting the address of something to a mode narrower
14260 than a word can cause avoid_constant_pool_reference()
14261 to return the pool reference itself. That's no good to
14262 us here. Lets just hope that we can use the
14263 constant pool value directly. */
14264 if (op == cop)
14265 cop = get_pool_constant (XEXP (op, 0));
14267 push_minipool_fix (insn, address,
14268 recog_data.operand_loc[opno],
14269 recog_data.operand_mode[opno], cop);
14276 return;
14279 /* Rewrite move insn into subtract of 0 if the condition codes will
14280 be useful in next conditional jump insn. */
14282 static void
14283 thumb1_reorg (void)
14285 basic_block bb;
14287 FOR_EACH_BB (bb)
14289 rtx set, dest, src;
14290 rtx pat, op0;
14291 rtx prev, insn = BB_END (bb);
14293 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
14294 insn = PREV_INSN (insn);
14296 /* Find the last cbranchsi4_insn in basic block BB. */
14297 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
14298 continue;
14300 /* Find the first non-note insn before INSN in basic block BB. */
14301 gcc_assert (insn != BB_HEAD (bb));
14302 prev = PREV_INSN (insn);
14303 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
14304 prev = PREV_INSN (prev);
14306 set = single_set (prev);
14307 if (!set)
14308 continue;
14310 dest = SET_DEST (set);
14311 src = SET_SRC (set);
14312 if (!low_register_operand (dest, SImode)
14313 || !low_register_operand (src, SImode))
14314 continue;
14316 pat = PATTERN (insn);
14317 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
14318 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
14319 in INSN. Don't need to check dest since cprop_hardreg pass propagates
14320 src into INSN. */
14321 if (REGNO (op0) == REGNO (src))
14323 dest = copy_rtx (dest);
14324 src = copy_rtx (src);
14325 src = gen_rtx_MINUS (SImode, src, const0_rtx);
14326 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
14327 INSN_CODE (prev) = -1;
14328 /* Set test register in INSN to dest. */
14329 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
14330 INSN_CODE (insn) = -1;
14335 /* Convert instructions to their cc-clobbering variant if possible, since
14336 that allows us to use smaller encodings. */
14338 static void
14339 thumb2_reorg (void)
14341 basic_block bb;
14342 regset_head live;
14344 INIT_REG_SET (&live);
14346 /* We are freeing block_for_insn in the toplev to keep compatibility
14347 with old MDEP_REORGS that are not CFG based. Recompute it now. */
14348 compute_bb_for_insn ();
14349 df_analyze ();
14351 FOR_EACH_BB (bb)
14353 rtx insn;
14355 COPY_REG_SET (&live, DF_LR_OUT (bb));
14356 df_simulate_initialize_backwards (bb, &live);
14357 FOR_BB_INSNS_REVERSE (bb, insn)
14359 if (NONJUMP_INSN_P (insn)
14360 && !REGNO_REG_SET_P (&live, CC_REGNUM)
14361 && GET_CODE (PATTERN (insn)) == SET)
14363 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
14364 rtx pat = PATTERN (insn);
14365 rtx dst = XEXP (pat, 0);
14366 rtx src = XEXP (pat, 1);
14367 rtx op0 = NULL_RTX, op1 = NULL_RTX;
14369 if (!OBJECT_P (src))
14370 op0 = XEXP (src, 0);
14372 if (BINARY_P (src))
14373 op1 = XEXP (src, 1);
14375 if (low_register_operand (dst, SImode))
14377 switch (GET_CODE (src))
14379 case PLUS:
14380 /* Adding two registers and storing the result
14381 in the first source is already a 16-bit
14382 operation. */
14383 if (rtx_equal_p (dst, op0)
14384 && register_operand (op1, SImode))
14385 break;
14387 if (low_register_operand (op0, SImode))
14389 /* ADDS <Rd>,<Rn>,<Rm> */
14390 if (low_register_operand (op1, SImode))
14391 action = CONV;
14392 /* ADDS <Rdn>,#<imm8> */
14393 /* SUBS <Rdn>,#<imm8> */
14394 else if (rtx_equal_p (dst, op0)
14395 && CONST_INT_P (op1)
14396 && IN_RANGE (INTVAL (op1), -255, 255))
14397 action = CONV;
14398 /* ADDS <Rd>,<Rn>,#<imm3> */
14399 /* SUBS <Rd>,<Rn>,#<imm3> */
14400 else if (CONST_INT_P (op1)
14401 && IN_RANGE (INTVAL (op1), -7, 7))
14402 action = CONV;
14404 /* ADCS <Rd>, <Rn> */
14405 else if (GET_CODE (XEXP (src, 0)) == PLUS
14406 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
14407 && low_register_operand (XEXP (XEXP (src, 0), 1),
14408 SImode)
14409 && COMPARISON_P (op1)
14410 && cc_register (XEXP (op1, 0), VOIDmode)
14411 && maybe_get_arm_condition_code (op1) == ARM_CS
14412 && XEXP (op1, 1) == const0_rtx)
14413 action = CONV;
14414 break;
14416 case MINUS:
14417 /* RSBS <Rd>,<Rn>,#0
14418 Not handled here: see NEG below. */
14419 /* SUBS <Rd>,<Rn>,#<imm3>
14420 SUBS <Rdn>,#<imm8>
14421 Not handled here: see PLUS above. */
14422 /* SUBS <Rd>,<Rn>,<Rm> */
14423 if (low_register_operand (op0, SImode)
14424 && low_register_operand (op1, SImode))
14425 action = CONV;
14426 break;
14428 case MULT:
14429 /* MULS <Rdm>,<Rn>,<Rdm>
14430 As an exception to the rule, this is only used
14431 when optimizing for size since MULS is slow on all
14432 known implementations. We do not even want to use
14433 MULS in cold code, if optimizing for speed, so we
14434 test the global flag here. */
14435 if (!optimize_size)
14436 break;
14437 /* else fall through. */
14438 case AND:
14439 case IOR:
14440 case XOR:
14441 /* ANDS <Rdn>,<Rm> */
14442 if (rtx_equal_p (dst, op0)
14443 && low_register_operand (op1, SImode))
14444 action = CONV;
14445 else if (rtx_equal_p (dst, op1)
14446 && low_register_operand (op0, SImode))
14447 action = SWAP_CONV;
14448 break;
14450 case ASHIFTRT:
14451 case ASHIFT:
14452 case LSHIFTRT:
14453 /* ASRS <Rdn>,<Rm> */
14454 /* LSRS <Rdn>,<Rm> */
14455 /* LSLS <Rdn>,<Rm> */
14456 if (rtx_equal_p (dst, op0)
14457 && low_register_operand (op1, SImode))
14458 action = CONV;
14459 /* ASRS <Rd>,<Rm>,#<imm5> */
14460 /* LSRS <Rd>,<Rm>,#<imm5> */
14461 /* LSLS <Rd>,<Rm>,#<imm5> */
14462 else if (low_register_operand (op0, SImode)
14463 && CONST_INT_P (op1)
14464 && IN_RANGE (INTVAL (op1), 0, 31))
14465 action = CONV;
14466 break;
14468 case ROTATERT:
14469 /* RORS <Rdn>,<Rm> */
14470 if (rtx_equal_p (dst, op0)
14471 && low_register_operand (op1, SImode))
14472 action = CONV;
14473 break;
14475 case NOT:
14476 case NEG:
14477 /* MVNS <Rd>,<Rm> */
14478 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
14479 if (low_register_operand (op0, SImode))
14480 action = CONV;
14481 break;
14483 case CONST_INT:
14484 /* MOVS <Rd>,#<imm8> */
14485 if (CONST_INT_P (src)
14486 && IN_RANGE (INTVAL (src), 0, 255))
14487 action = CONV;
14488 break;
14490 case REG:
14491 /* MOVS and MOV<c> with registers have different
14492 encodings, so are not relevant here. */
14493 break;
14495 default:
14496 break;
14500 if (action != SKIP)
14502 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
14503 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
14504 rtvec vec;
14506 if (action == SWAP_CONV)
14508 src = copy_rtx (src);
14509 XEXP (src, 0) = op1;
14510 XEXP (src, 1) = op0;
14511 pat = gen_rtx_SET (VOIDmode, dst, src);
14512 vec = gen_rtvec (2, pat, clobber);
14514 else /* action == CONV */
14515 vec = gen_rtvec (2, pat, clobber);
14517 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
14518 INSN_CODE (insn) = -1;
14522 if (NONDEBUG_INSN_P (insn))
14523 df_simulate_one_insn_backwards (bb, insn, &live);
14527 CLEAR_REG_SET (&live);
14530 /* Gcc puts the pool in the wrong place for ARM, since we can only
14531 load addresses a limited distance around the pc. We do some
14532 special munging to move the constant pool values to the correct
14533 point in the code. */
14534 static void
14535 arm_reorg (void)
14537 rtx insn;
14538 HOST_WIDE_INT address = 0;
14539 Mfix * fix;
14541 if (TARGET_THUMB1)
14542 thumb1_reorg ();
14543 else if (TARGET_THUMB2)
14544 thumb2_reorg ();
14546 /* Ensure all insns that must be split have been split at this point.
14547 Otherwise, the pool placement code below may compute incorrect
14548 insn lengths. Note that when optimizing, all insns have already
14549 been split at this point. */
14550 if (!optimize)
14551 split_all_insns_noflow ();
14553 minipool_fix_head = minipool_fix_tail = NULL;
14555 /* The first insn must always be a note, or the code below won't
14556 scan it properly. */
14557 insn = get_insns ();
14558 gcc_assert (NOTE_P (insn));
14559 minipool_pad = 0;
14561 /* Scan all the insns and record the operands that will need fixing. */
14562 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14564 if (BARRIER_P (insn))
14565 push_minipool_barrier (insn, address);
14566 else if (INSN_P (insn))
14568 rtx table;
14570 note_invalid_constants (insn, address, true);
14571 address += get_attr_length (insn);
14573 /* If the insn is a vector jump, add the size of the table
14574 and skip the table. */
14575 if ((table = is_jump_table (insn)) != NULL)
14577 address += get_jump_table_size (table);
14578 insn = table;
14581 else if (LABEL_P (insn))
14582 /* Add the worst-case padding due to alignment. We don't add
14583 the _current_ padding because the minipool insertions
14584 themselves might change it. */
14585 address += get_label_padding (insn);
14588 fix = minipool_fix_head;
14590 /* Now scan the fixups and perform the required changes. */
14591 while (fix)
14593 Mfix * ftmp;
14594 Mfix * fdel;
14595 Mfix * last_added_fix;
14596 Mfix * last_barrier = NULL;
14597 Mfix * this_fix;
14599 /* Skip any further barriers before the next fix. */
14600 while (fix && BARRIER_P (fix->insn))
14601 fix = fix->next;
14603 /* No more fixes. */
14604 if (fix == NULL)
14605 break;
14607 last_added_fix = NULL;
14609 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14611 if (BARRIER_P (ftmp->insn))
14613 if (ftmp->address >= minipool_vector_head->max_address)
14614 break;
14616 last_barrier = ftmp;
14618 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14619 break;
14621 last_added_fix = ftmp; /* Keep track of the last fix added. */
14624 /* If we found a barrier, drop back to that; any fixes that we
14625 could have reached but come after the barrier will now go in
14626 the next mini-pool. */
14627 if (last_barrier != NULL)
14629 /* Reduce the refcount for those fixes that won't go into this
14630 pool after all. */
14631 for (fdel = last_barrier->next;
14632 fdel && fdel != ftmp;
14633 fdel = fdel->next)
14635 fdel->minipool->refcount--;
14636 fdel->minipool = NULL;
14639 ftmp = last_barrier;
14641 else
14643 /* ftmp is first fix that we can't fit into this pool and
14644 there no natural barriers that we could use. Insert a
14645 new barrier in the code somewhere between the previous
14646 fix and this one, and arrange to jump around it. */
14647 HOST_WIDE_INT max_address;
14649 /* The last item on the list of fixes must be a barrier, so
14650 we can never run off the end of the list of fixes without
14651 last_barrier being set. */
14652 gcc_assert (ftmp);
14654 max_address = minipool_vector_head->max_address;
14655 /* Check that there isn't another fix that is in range that
14656 we couldn't fit into this pool because the pool was
14657 already too large: we need to put the pool before such an
14658 instruction. The pool itself may come just after the
14659 fix because create_fix_barrier also allows space for a
14660 jump instruction. */
14661 if (ftmp->address < max_address)
14662 max_address = ftmp->address + 1;
14664 last_barrier = create_fix_barrier (last_added_fix, max_address);
14667 assign_minipool_offsets (last_barrier);
14669 while (ftmp)
14671 if (!BARRIER_P (ftmp->insn)
14672 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14673 == NULL))
14674 break;
14676 ftmp = ftmp->next;
14679 /* Scan over the fixes we have identified for this pool, fixing them
14680 up and adding the constants to the pool itself. */
14681 for (this_fix = fix; this_fix && ftmp != this_fix;
14682 this_fix = this_fix->next)
14683 if (!BARRIER_P (this_fix->insn))
14685 rtx addr
14686 = plus_constant (Pmode,
14687 gen_rtx_LABEL_REF (VOIDmode,
14688 minipool_vector_label),
14689 this_fix->minipool->offset);
14690 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14693 dump_minipool (last_barrier->insn);
14694 fix = ftmp;
14697 /* From now on we must synthesize any constants that we can't handle
14698 directly. This can happen if the RTL gets split during final
14699 instruction generation. */
14700 after_arm_reorg = 1;
14702 /* Free the minipool memory. */
14703 obstack_free (&minipool_obstack, minipool_startobj);
14706 /* Routines to output assembly language. */
14708 /* If the rtx is the correct value then return the string of the number.
14709 In this way we can ensure that valid double constants are generated even
14710 when cross compiling. */
14711 const char *
14712 fp_immediate_constant (rtx x)
14714 REAL_VALUE_TYPE r;
14716 if (!fp_consts_inited)
14717 init_fp_table ();
14719 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14721 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14722 return "0";
14725 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14726 static const char *
14727 fp_const_from_val (REAL_VALUE_TYPE *r)
14729 if (!fp_consts_inited)
14730 init_fp_table ();
14732 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14733 return "0";
14736 /* OPERANDS[0] is the entire list of insns that constitute pop,
14737 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14738 is in the list, UPDATE is true iff the list contains explicit
14739 update of base register. */
14740 void
14741 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14742 bool update)
14744 int i;
14745 char pattern[100];
14746 int offset;
14747 const char *conditional;
14748 int num_saves = XVECLEN (operands[0], 0);
14749 unsigned int regno;
14750 unsigned int regno_base = REGNO (operands[1]);
14752 offset = 0;
14753 offset += update ? 1 : 0;
14754 offset += return_pc ? 1 : 0;
14756 /* Is the base register in the list? */
14757 for (i = offset; i < num_saves; i++)
14759 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14760 /* If SP is in the list, then the base register must be SP. */
14761 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14762 /* If base register is in the list, there must be no explicit update. */
14763 if (regno == regno_base)
14764 gcc_assert (!update);
14767 conditional = reverse ? "%?%D0" : "%?%d0";
14768 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14770 /* Output pop (not stmfd) because it has a shorter encoding. */
14771 gcc_assert (update);
14772 sprintf (pattern, "pop%s\t{", conditional);
14774 else
14776 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14777 It's just a convention, their semantics are identical. */
14778 if (regno_base == SP_REGNUM)
14779 sprintf (pattern, "ldm%sfd\t", conditional);
14780 else if (TARGET_UNIFIED_ASM)
14781 sprintf (pattern, "ldmia%s\t", conditional);
14782 else
14783 sprintf (pattern, "ldm%sia\t", conditional);
14785 strcat (pattern, reg_names[regno_base]);
14786 if (update)
14787 strcat (pattern, "!, {");
14788 else
14789 strcat (pattern, ", {");
14792 /* Output the first destination register. */
14793 strcat (pattern,
14794 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14796 /* Output the rest of the destination registers. */
14797 for (i = offset + 1; i < num_saves; i++)
14799 strcat (pattern, ", ");
14800 strcat (pattern,
14801 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14804 strcat (pattern, "}");
14806 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14807 strcat (pattern, "^");
14809 output_asm_insn (pattern, &cond);
14813 /* Output the assembly for a store multiple. */
14815 const char *
14816 vfp_output_fstmd (rtx * operands)
14818 char pattern[100];
14819 int p;
14820 int base;
14821 int i;
14823 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14824 p = strlen (pattern);
14826 gcc_assert (REG_P (operands[1]));
14828 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14829 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14831 p += sprintf (&pattern[p], ", d%d", base + i);
14833 strcpy (&pattern[p], "}");
14835 output_asm_insn (pattern, operands);
14836 return "";
14840 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14841 number of bytes pushed. */
14843 static int
14844 vfp_emit_fstmd (int base_reg, int count)
14846 rtx par;
14847 rtx dwarf;
14848 rtx tmp, reg;
14849 int i;
14851 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14852 register pairs are stored by a store multiple insn. We avoid this
14853 by pushing an extra pair. */
14854 if (count == 2 && !arm_arch6)
14856 if (base_reg == LAST_VFP_REGNUM - 3)
14857 base_reg -= 2;
14858 count++;
14861 /* FSTMD may not store more than 16 doubleword registers at once. Split
14862 larger stores into multiple parts (up to a maximum of two, in
14863 practice). */
14864 if (count > 16)
14866 int saved;
14867 /* NOTE: base_reg is an internal register number, so each D register
14868 counts as 2. */
14869 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14870 saved += vfp_emit_fstmd (base_reg, 16);
14871 return saved;
14874 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14875 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14877 reg = gen_rtx_REG (DFmode, base_reg);
14878 base_reg += 2;
14880 XVECEXP (par, 0, 0)
14881 = gen_rtx_SET (VOIDmode,
14882 gen_frame_mem
14883 (BLKmode,
14884 gen_rtx_PRE_MODIFY (Pmode,
14885 stack_pointer_rtx,
14886 plus_constant
14887 (Pmode, stack_pointer_rtx,
14888 - (count * 8)))
14890 gen_rtx_UNSPEC (BLKmode,
14891 gen_rtvec (1, reg),
14892 UNSPEC_PUSH_MULT));
14894 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14895 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14896 RTX_FRAME_RELATED_P (tmp) = 1;
14897 XVECEXP (dwarf, 0, 0) = tmp;
14899 tmp = gen_rtx_SET (VOIDmode,
14900 gen_frame_mem (DFmode, stack_pointer_rtx),
14901 reg);
14902 RTX_FRAME_RELATED_P (tmp) = 1;
14903 XVECEXP (dwarf, 0, 1) = tmp;
14905 for (i = 1; i < count; i++)
14907 reg = gen_rtx_REG (DFmode, base_reg);
14908 base_reg += 2;
14909 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14911 tmp = gen_rtx_SET (VOIDmode,
14912 gen_frame_mem (DFmode,
14913 plus_constant (Pmode,
14914 stack_pointer_rtx,
14915 i * 8)),
14916 reg);
14917 RTX_FRAME_RELATED_P (tmp) = 1;
14918 XVECEXP (dwarf, 0, i + 1) = tmp;
14921 par = emit_insn (par);
14922 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14923 RTX_FRAME_RELATED_P (par) = 1;
14925 return count * 8;
14928 /* Emit a call instruction with pattern PAT. ADDR is the address of
14929 the call target. */
14931 void
14932 arm_emit_call_insn (rtx pat, rtx addr)
14934 rtx insn;
14936 insn = emit_call_insn (pat);
14938 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14939 If the call might use such an entry, add a use of the PIC register
14940 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14941 if (TARGET_VXWORKS_RTP
14942 && flag_pic
14943 && GET_CODE (addr) == SYMBOL_REF
14944 && (SYMBOL_REF_DECL (addr)
14945 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14946 : !SYMBOL_REF_LOCAL_P (addr)))
14948 require_pic_register ();
14949 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14953 /* Output a 'call' insn. */
14954 const char *
14955 output_call (rtx *operands)
14957 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14959 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14960 if (REGNO (operands[0]) == LR_REGNUM)
14962 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14963 output_asm_insn ("mov%?\t%0, %|lr", operands);
14966 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14968 if (TARGET_INTERWORK || arm_arch4t)
14969 output_asm_insn ("bx%?\t%0", operands);
14970 else
14971 output_asm_insn ("mov%?\t%|pc, %0", operands);
14973 return "";
14976 /* Output a 'call' insn that is a reference in memory. This is
14977 disabled for ARMv5 and we prefer a blx instead because otherwise
14978 there's a significant performance overhead. */
14979 const char *
14980 output_call_mem (rtx *operands)
14982 gcc_assert (!arm_arch5);
14983 if (TARGET_INTERWORK)
14985 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14986 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14987 output_asm_insn ("bx%?\t%|ip", operands);
14989 else if (regno_use_in (LR_REGNUM, operands[0]))
14991 /* LR is used in the memory address. We load the address in the
14992 first instruction. It's safe to use IP as the target of the
14993 load since the call will kill it anyway. */
14994 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14995 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14996 if (arm_arch4t)
14997 output_asm_insn ("bx%?\t%|ip", operands);
14998 else
14999 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
15001 else
15003 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
15004 output_asm_insn ("ldr%?\t%|pc, %0", operands);
15007 return "";
15011 /* Output a move from arm registers to arm registers of a long double
15012 OPERANDS[0] is the destination.
15013 OPERANDS[1] is the source. */
15014 const char *
15015 output_mov_long_double_arm_from_arm (rtx *operands)
15017 /* We have to be careful here because the two might overlap. */
15018 int dest_start = REGNO (operands[0]);
15019 int src_start = REGNO (operands[1]);
15020 rtx ops[2];
15021 int i;
15023 if (dest_start < src_start)
15025 for (i = 0; i < 3; i++)
15027 ops[0] = gen_rtx_REG (SImode, dest_start + i);
15028 ops[1] = gen_rtx_REG (SImode, src_start + i);
15029 output_asm_insn ("mov%?\t%0, %1", ops);
15032 else
15034 for (i = 2; i >= 0; i--)
15036 ops[0] = gen_rtx_REG (SImode, dest_start + i);
15037 ops[1] = gen_rtx_REG (SImode, src_start + i);
15038 output_asm_insn ("mov%?\t%0, %1", ops);
15042 return "";
15045 void
15046 arm_emit_movpair (rtx dest, rtx src)
15048 /* If the src is an immediate, simplify it. */
15049 if (CONST_INT_P (src))
15051 HOST_WIDE_INT val = INTVAL (src);
15052 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
15053 if ((val >> 16) & 0x0000ffff)
15054 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
15055 GEN_INT (16)),
15056 GEN_INT ((val >> 16) & 0x0000ffff));
15057 return;
15059 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
15060 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
15063 /* Output a move between double words. It must be REG<-MEM
15064 or MEM<-REG. */
15065 const char *
15066 output_move_double (rtx *operands, bool emit, int *count)
15068 enum rtx_code code0 = GET_CODE (operands[0]);
15069 enum rtx_code code1 = GET_CODE (operands[1]);
15070 rtx otherops[3];
15071 if (count)
15072 *count = 1;
15074 /* The only case when this might happen is when
15075 you are looking at the length of a DImode instruction
15076 that has an invalid constant in it. */
15077 if (code0 == REG && code1 != MEM)
15079 gcc_assert (!emit);
15080 *count = 2;
15081 return "";
15084 if (code0 == REG)
15086 unsigned int reg0 = REGNO (operands[0]);
15088 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
15090 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
15092 switch (GET_CODE (XEXP (operands[1], 0)))
15094 case REG:
15096 if (emit)
15098 if (TARGET_LDRD
15099 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
15100 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
15101 else
15102 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15104 break;
15106 case PRE_INC:
15107 gcc_assert (TARGET_LDRD);
15108 if (emit)
15109 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
15110 break;
15112 case PRE_DEC:
15113 if (emit)
15115 if (TARGET_LDRD)
15116 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
15117 else
15118 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
15120 break;
15122 case POST_INC:
15123 if (emit)
15125 if (TARGET_LDRD)
15126 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
15127 else
15128 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
15130 break;
15132 case POST_DEC:
15133 gcc_assert (TARGET_LDRD);
15134 if (emit)
15135 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
15136 break;
15138 case PRE_MODIFY:
15139 case POST_MODIFY:
15140 /* Autoicrement addressing modes should never have overlapping
15141 base and destination registers, and overlapping index registers
15142 are already prohibited, so this doesn't need to worry about
15143 fix_cm3_ldrd. */
15144 otherops[0] = operands[0];
15145 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
15146 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
15148 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
15150 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
15152 /* Registers overlap so split out the increment. */
15153 if (emit)
15155 output_asm_insn ("add%?\t%1, %1, %2", otherops);
15156 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
15158 if (count)
15159 *count = 2;
15161 else
15163 /* Use a single insn if we can.
15164 FIXME: IWMMXT allows offsets larger than ldrd can
15165 handle, fix these up with a pair of ldr. */
15166 if (TARGET_THUMB2
15167 || !CONST_INT_P (otherops[2])
15168 || (INTVAL (otherops[2]) > -256
15169 && INTVAL (otherops[2]) < 256))
15171 if (emit)
15172 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
15174 else
15176 if (emit)
15178 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
15179 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15181 if (count)
15182 *count = 2;
15187 else
15189 /* Use a single insn if we can.
15190 FIXME: IWMMXT allows offsets larger than ldrd can handle,
15191 fix these up with a pair of ldr. */
15192 if (TARGET_THUMB2
15193 || !CONST_INT_P (otherops[2])
15194 || (INTVAL (otherops[2]) > -256
15195 && INTVAL (otherops[2]) < 256))
15197 if (emit)
15198 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
15200 else
15202 if (emit)
15204 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15205 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
15207 if (count)
15208 *count = 2;
15211 break;
15213 case LABEL_REF:
15214 case CONST:
15215 /* We might be able to use ldrd %0, %1 here. However the range is
15216 different to ldr/adr, and it is broken on some ARMv7-M
15217 implementations. */
15218 /* Use the second register of the pair to avoid problematic
15219 overlap. */
15220 otherops[1] = operands[1];
15221 if (emit)
15222 output_asm_insn ("adr%?\t%0, %1", otherops);
15223 operands[1] = otherops[0];
15224 if (emit)
15226 if (TARGET_LDRD)
15227 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15228 else
15229 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
15232 if (count)
15233 *count = 2;
15234 break;
15236 /* ??? This needs checking for thumb2. */
15237 default:
15238 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
15239 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
15241 otherops[0] = operands[0];
15242 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
15243 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
15245 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
15247 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15249 switch ((int) INTVAL (otherops[2]))
15251 case -8:
15252 if (emit)
15253 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
15254 return "";
15255 case -4:
15256 if (TARGET_THUMB2)
15257 break;
15258 if (emit)
15259 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
15260 return "";
15261 case 4:
15262 if (TARGET_THUMB2)
15263 break;
15264 if (emit)
15265 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
15266 return "";
15269 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
15270 operands[1] = otherops[0];
15271 if (TARGET_LDRD
15272 && (REG_P (otherops[2])
15273 || TARGET_THUMB2
15274 || (CONST_INT_P (otherops[2])
15275 && INTVAL (otherops[2]) > -256
15276 && INTVAL (otherops[2]) < 256)))
15278 if (reg_overlap_mentioned_p (operands[0],
15279 otherops[2]))
15281 rtx tmp;
15282 /* Swap base and index registers over to
15283 avoid a conflict. */
15284 tmp = otherops[1];
15285 otherops[1] = otherops[2];
15286 otherops[2] = tmp;
15288 /* If both registers conflict, it will usually
15289 have been fixed by a splitter. */
15290 if (reg_overlap_mentioned_p (operands[0], otherops[2])
15291 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
15293 if (emit)
15295 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15296 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15298 if (count)
15299 *count = 2;
15301 else
15303 otherops[0] = operands[0];
15304 if (emit)
15305 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
15307 return "";
15310 if (CONST_INT_P (otherops[2]))
15312 if (emit)
15314 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
15315 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
15316 else
15317 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15320 else
15322 if (emit)
15323 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15326 else
15328 if (emit)
15329 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
15332 if (count)
15333 *count = 2;
15335 if (TARGET_LDRD)
15336 return "ldr%(d%)\t%0, [%1]";
15338 return "ldm%(ia%)\t%1, %M0";
15340 else
15342 otherops[1] = adjust_address (operands[1], SImode, 4);
15343 /* Take care of overlapping base/data reg. */
15344 if (reg_mentioned_p (operands[0], operands[1]))
15346 if (emit)
15348 output_asm_insn ("ldr%?\t%0, %1", otherops);
15349 output_asm_insn ("ldr%?\t%0, %1", operands);
15351 if (count)
15352 *count = 2;
15355 else
15357 if (emit)
15359 output_asm_insn ("ldr%?\t%0, %1", operands);
15360 output_asm_insn ("ldr%?\t%0, %1", otherops);
15362 if (count)
15363 *count = 2;
15368 else
15370 /* Constraints should ensure this. */
15371 gcc_assert (code0 == MEM && code1 == REG);
15372 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
15373 || (TARGET_ARM && TARGET_LDRD));
15375 switch (GET_CODE (XEXP (operands[0], 0)))
15377 case REG:
15378 if (emit)
15380 if (TARGET_LDRD)
15381 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
15382 else
15383 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15385 break;
15387 case PRE_INC:
15388 gcc_assert (TARGET_LDRD);
15389 if (emit)
15390 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
15391 break;
15393 case PRE_DEC:
15394 if (emit)
15396 if (TARGET_LDRD)
15397 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
15398 else
15399 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
15401 break;
15403 case POST_INC:
15404 if (emit)
15406 if (TARGET_LDRD)
15407 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
15408 else
15409 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
15411 break;
15413 case POST_DEC:
15414 gcc_assert (TARGET_LDRD);
15415 if (emit)
15416 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
15417 break;
15419 case PRE_MODIFY:
15420 case POST_MODIFY:
15421 otherops[0] = operands[1];
15422 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
15423 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
15425 /* IWMMXT allows offsets larger than ldrd can handle,
15426 fix these up with a pair of ldr. */
15427 if (!TARGET_THUMB2
15428 && CONST_INT_P (otherops[2])
15429 && (INTVAL(otherops[2]) <= -256
15430 || INTVAL(otherops[2]) >= 256))
15432 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15434 if (emit)
15436 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
15437 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15439 if (count)
15440 *count = 2;
15442 else
15444 if (emit)
15446 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15447 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
15449 if (count)
15450 *count = 2;
15453 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15455 if (emit)
15456 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
15458 else
15460 if (emit)
15461 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
15463 break;
15465 case PLUS:
15466 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
15467 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15469 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
15471 case -8:
15472 if (emit)
15473 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
15474 return "";
15476 case -4:
15477 if (TARGET_THUMB2)
15478 break;
15479 if (emit)
15480 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
15481 return "";
15483 case 4:
15484 if (TARGET_THUMB2)
15485 break;
15486 if (emit)
15487 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
15488 return "";
15491 if (TARGET_LDRD
15492 && (REG_P (otherops[2])
15493 || TARGET_THUMB2
15494 || (CONST_INT_P (otherops[2])
15495 && INTVAL (otherops[2]) > -256
15496 && INTVAL (otherops[2]) < 256)))
15498 otherops[0] = operands[1];
15499 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
15500 if (emit)
15501 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
15502 return "";
15504 /* Fall through */
15506 default:
15507 otherops[0] = adjust_address (operands[0], SImode, 4);
15508 otherops[1] = operands[1];
15509 if (emit)
15511 output_asm_insn ("str%?\t%1, %0", operands);
15512 output_asm_insn ("str%?\t%H1, %0", otherops);
15514 if (count)
15515 *count = 2;
15519 return "";
15522 /* Output a move, load or store for quad-word vectors in ARM registers. Only
15523 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
15525 const char *
15526 output_move_quad (rtx *operands)
15528 if (REG_P (operands[0]))
15530 /* Load, or reg->reg move. */
15532 if (MEM_P (operands[1]))
15534 switch (GET_CODE (XEXP (operands[1], 0)))
15536 case REG:
15537 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15538 break;
15540 case LABEL_REF:
15541 case CONST:
15542 output_asm_insn ("adr%?\t%0, %1", operands);
15543 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
15544 break;
15546 default:
15547 gcc_unreachable ();
15550 else
15552 rtx ops[2];
15553 int dest, src, i;
15555 gcc_assert (REG_P (operands[1]));
15557 dest = REGNO (operands[0]);
15558 src = REGNO (operands[1]);
15560 /* This seems pretty dumb, but hopefully GCC won't try to do it
15561 very often. */
15562 if (dest < src)
15563 for (i = 0; i < 4; i++)
15565 ops[0] = gen_rtx_REG (SImode, dest + i);
15566 ops[1] = gen_rtx_REG (SImode, src + i);
15567 output_asm_insn ("mov%?\t%0, %1", ops);
15569 else
15570 for (i = 3; i >= 0; i--)
15572 ops[0] = gen_rtx_REG (SImode, dest + i);
15573 ops[1] = gen_rtx_REG (SImode, src + i);
15574 output_asm_insn ("mov%?\t%0, %1", ops);
15578 else
15580 gcc_assert (MEM_P (operands[0]));
15581 gcc_assert (REG_P (operands[1]));
15582 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15584 switch (GET_CODE (XEXP (operands[0], 0)))
15586 case REG:
15587 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15588 break;
15590 default:
15591 gcc_unreachable ();
15595 return "";
15598 /* Output a VFP load or store instruction. */
15600 const char *
15601 output_move_vfp (rtx *operands)
15603 rtx reg, mem, addr, ops[2];
15604 int load = REG_P (operands[0]);
15605 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15606 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15607 const char *templ;
15608 char buff[50];
15609 enum machine_mode mode;
15611 reg = operands[!load];
15612 mem = operands[load];
15614 mode = GET_MODE (reg);
15616 gcc_assert (REG_P (reg));
15617 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15618 gcc_assert (mode == SFmode
15619 || mode == DFmode
15620 || mode == SImode
15621 || mode == DImode
15622 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15623 gcc_assert (MEM_P (mem));
15625 addr = XEXP (mem, 0);
15627 switch (GET_CODE (addr))
15629 case PRE_DEC:
15630 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15631 ops[0] = XEXP (addr, 0);
15632 ops[1] = reg;
15633 break;
15635 case POST_INC:
15636 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15637 ops[0] = XEXP (addr, 0);
15638 ops[1] = reg;
15639 break;
15641 default:
15642 templ = "f%s%c%%?\t%%%s0, %%1%s";
15643 ops[0] = reg;
15644 ops[1] = mem;
15645 break;
15648 sprintf (buff, templ,
15649 load ? "ld" : "st",
15650 dp ? 'd' : 's',
15651 dp ? "P" : "",
15652 integer_p ? "\t%@ int" : "");
15653 output_asm_insn (buff, ops);
15655 return "";
15658 /* Output a Neon double-word or quad-word load or store, or a load
15659 or store for larger structure modes.
15661 WARNING: The ordering of elements is weird in big-endian mode,
15662 because the EABI requires that vectors stored in memory appear
15663 as though they were stored by a VSTM, as required by the EABI.
15664 GCC RTL defines element ordering based on in-memory order.
15665 This can be different from the architectural ordering of elements
15666 within a NEON register. The intrinsics defined in arm_neon.h use the
15667 NEON register element ordering, not the GCC RTL element ordering.
15669 For example, the in-memory ordering of a big-endian a quadword
15670 vector with 16-bit elements when stored from register pair {d0,d1}
15671 will be (lowest address first, d0[N] is NEON register element N):
15673 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15675 When necessary, quadword registers (dN, dN+1) are moved to ARM
15676 registers from rN in the order:
15678 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15680 So that STM/LDM can be used on vectors in ARM registers, and the
15681 same memory layout will result as if VSTM/VLDM were used.
15683 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15684 possible, which allows use of appropriate alignment tags.
15685 Note that the choice of "64" is independent of the actual vector
15686 element size; this size simply ensures that the behavior is
15687 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15689 Due to limitations of those instructions, use of VST1.64/VLD1.64
15690 is not possible if:
15691 - the address contains PRE_DEC, or
15692 - the mode refers to more than 4 double-word registers
15694 In those cases, it would be possible to replace VSTM/VLDM by a
15695 sequence of instructions; this is not currently implemented since
15696 this is not certain to actually improve performance. */
15698 const char *
15699 output_move_neon (rtx *operands)
15701 rtx reg, mem, addr, ops[2];
15702 int regno, nregs, load = REG_P (operands[0]);
15703 const char *templ;
15704 char buff[50];
15705 enum machine_mode mode;
15707 reg = operands[!load];
15708 mem = operands[load];
15710 mode = GET_MODE (reg);
15712 gcc_assert (REG_P (reg));
15713 regno = REGNO (reg);
15714 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15715 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15716 || NEON_REGNO_OK_FOR_QUAD (regno));
15717 gcc_assert (VALID_NEON_DREG_MODE (mode)
15718 || VALID_NEON_QREG_MODE (mode)
15719 || VALID_NEON_STRUCT_MODE (mode));
15720 gcc_assert (MEM_P (mem));
15722 addr = XEXP (mem, 0);
15724 /* Strip off const from addresses like (const (plus (...))). */
15725 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15726 addr = XEXP (addr, 0);
15728 switch (GET_CODE (addr))
15730 case POST_INC:
15731 /* We have to use vldm / vstm for too-large modes. */
15732 if (nregs > 4)
15734 templ = "v%smia%%?\t%%0!, %%h1";
15735 ops[0] = XEXP (addr, 0);
15737 else
15739 templ = "v%s1.64\t%%h1, %%A0";
15740 ops[0] = mem;
15742 ops[1] = reg;
15743 break;
15745 case PRE_DEC:
15746 /* We have to use vldm / vstm in this case, since there is no
15747 pre-decrement form of the vld1 / vst1 instructions. */
15748 templ = "v%smdb%%?\t%%0!, %%h1";
15749 ops[0] = XEXP (addr, 0);
15750 ops[1] = reg;
15751 break;
15753 case POST_MODIFY:
15754 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15755 gcc_unreachable ();
15757 case LABEL_REF:
15758 case PLUS:
15760 int i;
15761 int overlap = -1;
15762 for (i = 0; i < nregs; i++)
15764 /* We're only using DImode here because it's a convenient size. */
15765 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15766 ops[1] = adjust_address (mem, DImode, 8 * i);
15767 if (reg_overlap_mentioned_p (ops[0], mem))
15769 gcc_assert (overlap == -1);
15770 overlap = i;
15772 else
15774 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15775 output_asm_insn (buff, ops);
15778 if (overlap != -1)
15780 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15781 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15782 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15783 output_asm_insn (buff, ops);
15786 return "";
15789 default:
15790 /* We have to use vldm / vstm for too-large modes. */
15791 if (nregs > 4)
15792 templ = "v%smia%%?\t%%m0, %%h1";
15793 else
15794 templ = "v%s1.64\t%%h1, %%A0";
15796 ops[0] = mem;
15797 ops[1] = reg;
15800 sprintf (buff, templ, load ? "ld" : "st");
15801 output_asm_insn (buff, ops);
15803 return "";
15806 /* Compute and return the length of neon_mov<mode>, where <mode> is
15807 one of VSTRUCT modes: EI, OI, CI or XI. */
15809 arm_attr_length_move_neon (rtx insn)
15811 rtx reg, mem, addr;
15812 int load;
15813 enum machine_mode mode;
15815 extract_insn_cached (insn);
15817 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15819 mode = GET_MODE (recog_data.operand[0]);
15820 switch (mode)
15822 case EImode:
15823 case OImode:
15824 return 8;
15825 case CImode:
15826 return 12;
15827 case XImode:
15828 return 16;
15829 default:
15830 gcc_unreachable ();
15834 load = REG_P (recog_data.operand[0]);
15835 reg = recog_data.operand[!load];
15836 mem = recog_data.operand[load];
15838 gcc_assert (MEM_P (mem));
15840 mode = GET_MODE (reg);
15841 addr = XEXP (mem, 0);
15843 /* Strip off const from addresses like (const (plus (...))). */
15844 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15845 addr = XEXP (addr, 0);
15847 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15849 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15850 return insns * 4;
15852 else
15853 return 4;
15856 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15857 return zero. */
15860 arm_address_offset_is_imm (rtx insn)
15862 rtx mem, addr;
15864 extract_insn_cached (insn);
15866 if (REG_P (recog_data.operand[0]))
15867 return 0;
15869 mem = recog_data.operand[0];
15871 gcc_assert (MEM_P (mem));
15873 addr = XEXP (mem, 0);
15875 if (REG_P (addr)
15876 || (GET_CODE (addr) == PLUS
15877 && REG_P (XEXP (addr, 0))
15878 && CONST_INT_P (XEXP (addr, 1))))
15879 return 1;
15880 else
15881 return 0;
15884 /* Output an ADD r, s, #n where n may be too big for one instruction.
15885 If adding zero to one register, output nothing. */
15886 const char *
15887 output_add_immediate (rtx *operands)
15889 HOST_WIDE_INT n = INTVAL (operands[2]);
15891 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15893 if (n < 0)
15894 output_multi_immediate (operands,
15895 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15896 -n);
15897 else
15898 output_multi_immediate (operands,
15899 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15903 return "";
15906 /* Output a multiple immediate operation.
15907 OPERANDS is the vector of operands referred to in the output patterns.
15908 INSTR1 is the output pattern to use for the first constant.
15909 INSTR2 is the output pattern to use for subsequent constants.
15910 IMMED_OP is the index of the constant slot in OPERANDS.
15911 N is the constant value. */
15912 static const char *
15913 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15914 int immed_op, HOST_WIDE_INT n)
15916 #if HOST_BITS_PER_WIDE_INT > 32
15917 n &= 0xffffffff;
15918 #endif
15920 if (n == 0)
15922 /* Quick and easy output. */
15923 operands[immed_op] = const0_rtx;
15924 output_asm_insn (instr1, operands);
15926 else
15928 int i;
15929 const char * instr = instr1;
15931 /* Note that n is never zero here (which would give no output). */
15932 for (i = 0; i < 32; i += 2)
15934 if (n & (3 << i))
15936 operands[immed_op] = GEN_INT (n & (255 << i));
15937 output_asm_insn (instr, operands);
15938 instr = instr2;
15939 i += 6;
15944 return "";
15947 /* Return the name of a shifter operation. */
15948 static const char *
15949 arm_shift_nmem(enum rtx_code code)
15951 switch (code)
15953 case ASHIFT:
15954 return ARM_LSL_NAME;
15956 case ASHIFTRT:
15957 return "asr";
15959 case LSHIFTRT:
15960 return "lsr";
15962 case ROTATERT:
15963 return "ror";
15965 default:
15966 abort();
15970 /* Return the appropriate ARM instruction for the operation code.
15971 The returned result should not be overwritten. OP is the rtx of the
15972 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15973 was shifted. */
15974 const char *
15975 arithmetic_instr (rtx op, int shift_first_arg)
15977 switch (GET_CODE (op))
15979 case PLUS:
15980 return "add";
15982 case MINUS:
15983 return shift_first_arg ? "rsb" : "sub";
15985 case IOR:
15986 return "orr";
15988 case XOR:
15989 return "eor";
15991 case AND:
15992 return "and";
15994 case ASHIFT:
15995 case ASHIFTRT:
15996 case LSHIFTRT:
15997 case ROTATERT:
15998 return arm_shift_nmem(GET_CODE(op));
16000 default:
16001 gcc_unreachable ();
16005 /* Ensure valid constant shifts and return the appropriate shift mnemonic
16006 for the operation code. The returned result should not be overwritten.
16007 OP is the rtx code of the shift.
16008 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
16009 shift. */
16010 static const char *
16011 shift_op (rtx op, HOST_WIDE_INT *amountp)
16013 const char * mnem;
16014 enum rtx_code code = GET_CODE (op);
16016 switch (code)
16018 case ROTATE:
16019 if (!CONST_INT_P (XEXP (op, 1)))
16021 output_operand_lossage ("invalid shift operand");
16022 return NULL;
16025 code = ROTATERT;
16026 *amountp = 32 - INTVAL (XEXP (op, 1));
16027 mnem = "ror";
16028 break;
16030 case ASHIFT:
16031 case ASHIFTRT:
16032 case LSHIFTRT:
16033 case ROTATERT:
16034 mnem = arm_shift_nmem(code);
16035 if (CONST_INT_P (XEXP (op, 1)))
16037 *amountp = INTVAL (XEXP (op, 1));
16039 else if (REG_P (XEXP (op, 1)))
16041 *amountp = -1;
16042 return mnem;
16044 else
16046 output_operand_lossage ("invalid shift operand");
16047 return NULL;
16049 break;
16051 case MULT:
16052 /* We never have to worry about the amount being other than a
16053 power of 2, since this case can never be reloaded from a reg. */
16054 if (!CONST_INT_P (XEXP (op, 1)))
16056 output_operand_lossage ("invalid shift operand");
16057 return NULL;
16060 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
16062 /* Amount must be a power of two. */
16063 if (*amountp & (*amountp - 1))
16065 output_operand_lossage ("invalid shift operand");
16066 return NULL;
16069 *amountp = int_log2 (*amountp);
16070 return ARM_LSL_NAME;
16072 default:
16073 output_operand_lossage ("invalid shift operand");
16074 return NULL;
16077 /* This is not 100% correct, but follows from the desire to merge
16078 multiplication by a power of 2 with the recognizer for a
16079 shift. >=32 is not a valid shift for "lsl", so we must try and
16080 output a shift that produces the correct arithmetical result.
16081 Using lsr #32 is identical except for the fact that the carry bit
16082 is not set correctly if we set the flags; but we never use the
16083 carry bit from such an operation, so we can ignore that. */
16084 if (code == ROTATERT)
16085 /* Rotate is just modulo 32. */
16086 *amountp &= 31;
16087 else if (*amountp != (*amountp & 31))
16089 if (code == ASHIFT)
16090 mnem = "lsr";
16091 *amountp = 32;
16094 /* Shifts of 0 are no-ops. */
16095 if (*amountp == 0)
16096 return NULL;
16098 return mnem;
16101 /* Obtain the shift from the POWER of two. */
16103 static HOST_WIDE_INT
16104 int_log2 (HOST_WIDE_INT power)
16106 HOST_WIDE_INT shift = 0;
16108 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
16110 gcc_assert (shift <= 31);
16111 shift++;
16114 return shift;
16117 /* Output a .ascii pseudo-op, keeping track of lengths. This is
16118 because /bin/as is horribly restrictive. The judgement about
16119 whether or not each character is 'printable' (and can be output as
16120 is) or not (and must be printed with an octal escape) must be made
16121 with reference to the *host* character set -- the situation is
16122 similar to that discussed in the comments above pp_c_char in
16123 c-pretty-print.c. */
16125 #define MAX_ASCII_LEN 51
16127 void
16128 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
16130 int i;
16131 int len_so_far = 0;
16133 fputs ("\t.ascii\t\"", stream);
16135 for (i = 0; i < len; i++)
16137 int c = p[i];
16139 if (len_so_far >= MAX_ASCII_LEN)
16141 fputs ("\"\n\t.ascii\t\"", stream);
16142 len_so_far = 0;
16145 if (ISPRINT (c))
16147 if (c == '\\' || c == '\"')
16149 putc ('\\', stream);
16150 len_so_far++;
16152 putc (c, stream);
16153 len_so_far++;
16155 else
16157 fprintf (stream, "\\%03o", c);
16158 len_so_far += 4;
16162 fputs ("\"\n", stream);
16165 /* Compute the register save mask for registers 0 through 12
16166 inclusive. This code is used by arm_compute_save_reg_mask. */
16168 static unsigned long
16169 arm_compute_save_reg0_reg12_mask (void)
16171 unsigned long func_type = arm_current_func_type ();
16172 unsigned long save_reg_mask = 0;
16173 unsigned int reg;
16175 if (IS_INTERRUPT (func_type))
16177 unsigned int max_reg;
16178 /* Interrupt functions must not corrupt any registers,
16179 even call clobbered ones. If this is a leaf function
16180 we can just examine the registers used by the RTL, but
16181 otherwise we have to assume that whatever function is
16182 called might clobber anything, and so we have to save
16183 all the call-clobbered registers as well. */
16184 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
16185 /* FIQ handlers have registers r8 - r12 banked, so
16186 we only need to check r0 - r7, Normal ISRs only
16187 bank r14 and r15, so we must check up to r12.
16188 r13 is the stack pointer which is always preserved,
16189 so we do not need to consider it here. */
16190 max_reg = 7;
16191 else
16192 max_reg = 12;
16194 for (reg = 0; reg <= max_reg; reg++)
16195 if (df_regs_ever_live_p (reg)
16196 || (! crtl->is_leaf && call_used_regs[reg]))
16197 save_reg_mask |= (1 << reg);
16199 /* Also save the pic base register if necessary. */
16200 if (flag_pic
16201 && !TARGET_SINGLE_PIC_BASE
16202 && arm_pic_register != INVALID_REGNUM
16203 && crtl->uses_pic_offset_table)
16204 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16206 else if (IS_VOLATILE(func_type))
16208 /* For noreturn functions we historically omitted register saves
16209 altogether. However this really messes up debugging. As a
16210 compromise save just the frame pointers. Combined with the link
16211 register saved elsewhere this should be sufficient to get
16212 a backtrace. */
16213 if (frame_pointer_needed)
16214 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16215 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
16216 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16217 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
16218 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
16220 else
16222 /* In the normal case we only need to save those registers
16223 which are call saved and which are used by this function. */
16224 for (reg = 0; reg <= 11; reg++)
16225 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16226 save_reg_mask |= (1 << reg);
16228 /* Handle the frame pointer as a special case. */
16229 if (frame_pointer_needed)
16230 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16232 /* If we aren't loading the PIC register,
16233 don't stack it even though it may be live. */
16234 if (flag_pic
16235 && !TARGET_SINGLE_PIC_BASE
16236 && arm_pic_register != INVALID_REGNUM
16237 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
16238 || crtl->uses_pic_offset_table))
16239 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16241 /* The prologue will copy SP into R0, so save it. */
16242 if (IS_STACKALIGN (func_type))
16243 save_reg_mask |= 1;
16246 /* Save registers so the exception handler can modify them. */
16247 if (crtl->calls_eh_return)
16249 unsigned int i;
16251 for (i = 0; ; i++)
16253 reg = EH_RETURN_DATA_REGNO (i);
16254 if (reg == INVALID_REGNUM)
16255 break;
16256 save_reg_mask |= 1 << reg;
16260 return save_reg_mask;
16264 /* Compute the number of bytes used to store the static chain register on the
16265 stack, above the stack frame. We need to know this accurately to get the
16266 alignment of the rest of the stack frame correct. */
16268 static int arm_compute_static_chain_stack_bytes (void)
16270 unsigned long func_type = arm_current_func_type ();
16271 int static_chain_stack_bytes = 0;
16273 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
16274 IS_NESTED (func_type) &&
16275 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
16276 static_chain_stack_bytes = 4;
16278 return static_chain_stack_bytes;
16282 /* Compute a bit mask of which registers need to be
16283 saved on the stack for the current function.
16284 This is used by arm_get_frame_offsets, which may add extra registers. */
16286 static unsigned long
16287 arm_compute_save_reg_mask (void)
16289 unsigned int save_reg_mask = 0;
16290 unsigned long func_type = arm_current_func_type ();
16291 unsigned int reg;
16293 if (IS_NAKED (func_type))
16294 /* This should never really happen. */
16295 return 0;
16297 /* If we are creating a stack frame, then we must save the frame pointer,
16298 IP (which will hold the old stack pointer), LR and the PC. */
16299 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16300 save_reg_mask |=
16301 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
16302 | (1 << IP_REGNUM)
16303 | (1 << LR_REGNUM)
16304 | (1 << PC_REGNUM);
16306 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
16308 /* Decide if we need to save the link register.
16309 Interrupt routines have their own banked link register,
16310 so they never need to save it.
16311 Otherwise if we do not use the link register we do not need to save
16312 it. If we are pushing other registers onto the stack however, we
16313 can save an instruction in the epilogue by pushing the link register
16314 now and then popping it back into the PC. This incurs extra memory
16315 accesses though, so we only do it when optimizing for size, and only
16316 if we know that we will not need a fancy return sequence. */
16317 if (df_regs_ever_live_p (LR_REGNUM)
16318 || (save_reg_mask
16319 && optimize_size
16320 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
16321 && !crtl->calls_eh_return))
16322 save_reg_mask |= 1 << LR_REGNUM;
16324 if (cfun->machine->lr_save_eliminated)
16325 save_reg_mask &= ~ (1 << LR_REGNUM);
16327 if (TARGET_REALLY_IWMMXT
16328 && ((bit_count (save_reg_mask)
16329 + ARM_NUM_INTS (crtl->args.pretend_args_size +
16330 arm_compute_static_chain_stack_bytes())
16331 ) % 2) != 0)
16333 /* The total number of registers that are going to be pushed
16334 onto the stack is odd. We need to ensure that the stack
16335 is 64-bit aligned before we start to save iWMMXt registers,
16336 and also before we start to create locals. (A local variable
16337 might be a double or long long which we will load/store using
16338 an iWMMXt instruction). Therefore we need to push another
16339 ARM register, so that the stack will be 64-bit aligned. We
16340 try to avoid using the arg registers (r0 -r3) as they might be
16341 used to pass values in a tail call. */
16342 for (reg = 4; reg <= 12; reg++)
16343 if ((save_reg_mask & (1 << reg)) == 0)
16344 break;
16346 if (reg <= 12)
16347 save_reg_mask |= (1 << reg);
16348 else
16350 cfun->machine->sibcall_blocked = 1;
16351 save_reg_mask |= (1 << 3);
16355 /* We may need to push an additional register for use initializing the
16356 PIC base register. */
16357 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
16358 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
16360 reg = thumb_find_work_register (1 << 4);
16361 if (!call_used_regs[reg])
16362 save_reg_mask |= (1 << reg);
16365 return save_reg_mask;
16369 /* Compute a bit mask of which registers need to be
16370 saved on the stack for the current function. */
16371 static unsigned long
16372 thumb1_compute_save_reg_mask (void)
16374 unsigned long mask;
16375 unsigned reg;
16377 mask = 0;
16378 for (reg = 0; reg < 12; reg ++)
16379 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16380 mask |= 1 << reg;
16382 if (flag_pic
16383 && !TARGET_SINGLE_PIC_BASE
16384 && arm_pic_register != INVALID_REGNUM
16385 && crtl->uses_pic_offset_table)
16386 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16388 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
16389 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16390 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16392 /* LR will also be pushed if any lo regs are pushed. */
16393 if (mask & 0xff || thumb_force_lr_save ())
16394 mask |= (1 << LR_REGNUM);
16396 /* Make sure we have a low work register if we need one.
16397 We will need one if we are going to push a high register,
16398 but we are not currently intending to push a low register. */
16399 if ((mask & 0xff) == 0
16400 && ((mask & 0x0f00) || TARGET_BACKTRACE))
16402 /* Use thumb_find_work_register to choose which register
16403 we will use. If the register is live then we will
16404 have to push it. Use LAST_LO_REGNUM as our fallback
16405 choice for the register to select. */
16406 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
16407 /* Make sure the register returned by thumb_find_work_register is
16408 not part of the return value. */
16409 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
16410 reg = LAST_LO_REGNUM;
16412 if (! call_used_regs[reg])
16413 mask |= 1 << reg;
16416 /* The 504 below is 8 bytes less than 512 because there are two possible
16417 alignment words. We can't tell here if they will be present or not so we
16418 have to play it safe and assume that they are. */
16419 if ((CALLER_INTERWORKING_SLOT_SIZE +
16420 ROUND_UP_WORD (get_frame_size ()) +
16421 crtl->outgoing_args_size) >= 504)
16423 /* This is the same as the code in thumb1_expand_prologue() which
16424 determines which register to use for stack decrement. */
16425 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
16426 if (mask & (1 << reg))
16427 break;
16429 if (reg > LAST_LO_REGNUM)
16431 /* Make sure we have a register available for stack decrement. */
16432 mask |= 1 << LAST_LO_REGNUM;
16436 return mask;
16440 /* Return the number of bytes required to save VFP registers. */
16441 static int
16442 arm_get_vfp_saved_size (void)
16444 unsigned int regno;
16445 int count;
16446 int saved;
16448 saved = 0;
16449 /* Space for saved VFP registers. */
16450 if (TARGET_HARD_FLOAT && TARGET_VFP)
16452 count = 0;
16453 for (regno = FIRST_VFP_REGNUM;
16454 regno < LAST_VFP_REGNUM;
16455 regno += 2)
16457 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
16458 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
16460 if (count > 0)
16462 /* Workaround ARM10 VFPr1 bug. */
16463 if (count == 2 && !arm_arch6)
16464 count++;
16465 saved += count * 8;
16467 count = 0;
16469 else
16470 count++;
16472 if (count > 0)
16474 if (count == 2 && !arm_arch6)
16475 count++;
16476 saved += count * 8;
16479 return saved;
16483 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
16484 everything bar the final return instruction. If simple_return is true,
16485 then do not output epilogue, because it has already been emitted in RTL. */
16486 const char *
16487 output_return_instruction (rtx operand, bool really_return, bool reverse,
16488 bool simple_return)
16490 char conditional[10];
16491 char instr[100];
16492 unsigned reg;
16493 unsigned long live_regs_mask;
16494 unsigned long func_type;
16495 arm_stack_offsets *offsets;
16497 func_type = arm_current_func_type ();
16499 if (IS_NAKED (func_type))
16500 return "";
16502 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
16504 /* If this function was declared non-returning, and we have
16505 found a tail call, then we have to trust that the called
16506 function won't return. */
16507 if (really_return)
16509 rtx ops[2];
16511 /* Otherwise, trap an attempted return by aborting. */
16512 ops[0] = operand;
16513 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
16514 : "abort");
16515 assemble_external_libcall (ops[1]);
16516 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
16519 return "";
16522 gcc_assert (!cfun->calls_alloca || really_return);
16524 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
16526 cfun->machine->return_used_this_function = 1;
16528 offsets = arm_get_frame_offsets ();
16529 live_regs_mask = offsets->saved_regs_mask;
16531 if (!simple_return && live_regs_mask)
16533 const char * return_reg;
16535 /* If we do not have any special requirements for function exit
16536 (e.g. interworking) then we can load the return address
16537 directly into the PC. Otherwise we must load it into LR. */
16538 if (really_return
16539 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
16540 return_reg = reg_names[PC_REGNUM];
16541 else
16542 return_reg = reg_names[LR_REGNUM];
16544 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
16546 /* There are three possible reasons for the IP register
16547 being saved. 1) a stack frame was created, in which case
16548 IP contains the old stack pointer, or 2) an ISR routine
16549 corrupted it, or 3) it was saved to align the stack on
16550 iWMMXt. In case 1, restore IP into SP, otherwise just
16551 restore IP. */
16552 if (frame_pointer_needed)
16554 live_regs_mask &= ~ (1 << IP_REGNUM);
16555 live_regs_mask |= (1 << SP_REGNUM);
16557 else
16558 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16561 /* On some ARM architectures it is faster to use LDR rather than
16562 LDM to load a single register. On other architectures, the
16563 cost is the same. In 26 bit mode, or for exception handlers,
16564 we have to use LDM to load the PC so that the CPSR is also
16565 restored. */
16566 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16567 if (live_regs_mask == (1U << reg))
16568 break;
16570 if (reg <= LAST_ARM_REGNUM
16571 && (reg != LR_REGNUM
16572 || ! really_return
16573 || ! IS_INTERRUPT (func_type)))
16575 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16576 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16578 else
16580 char *p;
16581 int first = 1;
16583 /* Generate the load multiple instruction to restore the
16584 registers. Note we can get here, even if
16585 frame_pointer_needed is true, but only if sp already
16586 points to the base of the saved core registers. */
16587 if (live_regs_mask & (1 << SP_REGNUM))
16589 unsigned HOST_WIDE_INT stack_adjust;
16591 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16592 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16594 if (stack_adjust && arm_arch5 && TARGET_ARM)
16595 if (TARGET_UNIFIED_ASM)
16596 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16597 else
16598 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16599 else
16601 /* If we can't use ldmib (SA110 bug),
16602 then try to pop r3 instead. */
16603 if (stack_adjust)
16604 live_regs_mask |= 1 << 3;
16606 if (TARGET_UNIFIED_ASM)
16607 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16608 else
16609 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16612 else
16613 if (TARGET_UNIFIED_ASM)
16614 sprintf (instr, "pop%s\t{", conditional);
16615 else
16616 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16618 p = instr + strlen (instr);
16620 for (reg = 0; reg <= SP_REGNUM; reg++)
16621 if (live_regs_mask & (1 << reg))
16623 int l = strlen (reg_names[reg]);
16625 if (first)
16626 first = 0;
16627 else
16629 memcpy (p, ", ", 2);
16630 p += 2;
16633 memcpy (p, "%|", 2);
16634 memcpy (p + 2, reg_names[reg], l);
16635 p += l + 2;
16638 if (live_regs_mask & (1 << LR_REGNUM))
16640 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16641 /* If returning from an interrupt, restore the CPSR. */
16642 if (IS_INTERRUPT (func_type))
16643 strcat (p, "^");
16645 else
16646 strcpy (p, "}");
16649 output_asm_insn (instr, & operand);
16651 /* See if we need to generate an extra instruction to
16652 perform the actual function return. */
16653 if (really_return
16654 && func_type != ARM_FT_INTERWORKED
16655 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16657 /* The return has already been handled
16658 by loading the LR into the PC. */
16659 return "";
16663 if (really_return)
16665 switch ((int) ARM_FUNC_TYPE (func_type))
16667 case ARM_FT_ISR:
16668 case ARM_FT_FIQ:
16669 /* ??? This is wrong for unified assembly syntax. */
16670 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16671 break;
16673 case ARM_FT_INTERWORKED:
16674 sprintf (instr, "bx%s\t%%|lr", conditional);
16675 break;
16677 case ARM_FT_EXCEPTION:
16678 /* ??? This is wrong for unified assembly syntax. */
16679 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16680 break;
16682 default:
16683 /* Use bx if it's available. */
16684 if (arm_arch5 || arm_arch4t)
16685 sprintf (instr, "bx%s\t%%|lr", conditional);
16686 else
16687 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16688 break;
16691 output_asm_insn (instr, & operand);
16694 return "";
16697 /* Write the function name into the code section, directly preceding
16698 the function prologue.
16700 Code will be output similar to this:
16702 .ascii "arm_poke_function_name", 0
16703 .align
16705 .word 0xff000000 + (t1 - t0)
16706 arm_poke_function_name
16707 mov ip, sp
16708 stmfd sp!, {fp, ip, lr, pc}
16709 sub fp, ip, #4
16711 When performing a stack backtrace, code can inspect the value
16712 of 'pc' stored at 'fp' + 0. If the trace function then looks
16713 at location pc - 12 and the top 8 bits are set, then we know
16714 that there is a function name embedded immediately preceding this
16715 location and has length ((pc[-3]) & 0xff000000).
16717 We assume that pc is declared as a pointer to an unsigned long.
16719 It is of no benefit to output the function name if we are assembling
16720 a leaf function. These function types will not contain a stack
16721 backtrace structure, therefore it is not possible to determine the
16722 function name. */
16723 void
16724 arm_poke_function_name (FILE *stream, const char *name)
16726 unsigned long alignlength;
16727 unsigned long length;
16728 rtx x;
16730 length = strlen (name) + 1;
16731 alignlength = ROUND_UP_WORD (length);
16733 ASM_OUTPUT_ASCII (stream, name, length);
16734 ASM_OUTPUT_ALIGN (stream, 2);
16735 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16736 assemble_aligned_integer (UNITS_PER_WORD, x);
16739 /* Place some comments into the assembler stream
16740 describing the current function. */
16741 static void
16742 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16744 unsigned long func_type;
16746 /* ??? Do we want to print some of the below anyway? */
16747 if (TARGET_THUMB1)
16748 return;
16750 /* Sanity check. */
16751 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16753 func_type = arm_current_func_type ();
16755 switch ((int) ARM_FUNC_TYPE (func_type))
16757 default:
16758 case ARM_FT_NORMAL:
16759 break;
16760 case ARM_FT_INTERWORKED:
16761 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16762 break;
16763 case ARM_FT_ISR:
16764 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16765 break;
16766 case ARM_FT_FIQ:
16767 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16768 break;
16769 case ARM_FT_EXCEPTION:
16770 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16771 break;
16774 if (IS_NAKED (func_type))
16775 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16777 if (IS_VOLATILE (func_type))
16778 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16780 if (IS_NESTED (func_type))
16781 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16782 if (IS_STACKALIGN (func_type))
16783 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16785 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16786 crtl->args.size,
16787 crtl->args.pretend_args_size, frame_size);
16789 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16790 frame_pointer_needed,
16791 cfun->machine->uses_anonymous_args);
16793 if (cfun->machine->lr_save_eliminated)
16794 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16796 if (crtl->calls_eh_return)
16797 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16801 static void
16802 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16803 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16805 arm_stack_offsets *offsets;
16807 if (TARGET_THUMB1)
16809 int regno;
16811 /* Emit any call-via-reg trampolines that are needed for v4t support
16812 of call_reg and call_value_reg type insns. */
16813 for (regno = 0; regno < LR_REGNUM; regno++)
16815 rtx label = cfun->machine->call_via[regno];
16817 if (label != NULL)
16819 switch_to_section (function_section (current_function_decl));
16820 targetm.asm_out.internal_label (asm_out_file, "L",
16821 CODE_LABEL_NUMBER (label));
16822 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16826 /* ??? Probably not safe to set this here, since it assumes that a
16827 function will be emitted as assembly immediately after we generate
16828 RTL for it. This does not happen for inline functions. */
16829 cfun->machine->return_used_this_function = 0;
16831 else /* TARGET_32BIT */
16833 /* We need to take into account any stack-frame rounding. */
16834 offsets = arm_get_frame_offsets ();
16836 gcc_assert (!use_return_insn (FALSE, NULL)
16837 || (cfun->machine->return_used_this_function != 0)
16838 || offsets->saved_regs == offsets->outgoing_args
16839 || frame_pointer_needed);
16841 /* Reset the ARM-specific per-function variables. */
16842 after_arm_reorg = 0;
16846 /* Generate and emit a sequence of insns equivalent to PUSH, but using
16847 STR and STRD. If an even number of registers are being pushed, one
16848 or more STRD patterns are created for each register pair. If an
16849 odd number of registers are pushed, emit an initial STR followed by
16850 as many STRD instructions as are needed. This works best when the
16851 stack is initially 64-bit aligned (the normal case), since it
16852 ensures that each STRD is also 64-bit aligned. */
16853 static void
16854 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16856 int num_regs = 0;
16857 int i;
16858 int regno;
16859 rtx par = NULL_RTX;
16860 rtx dwarf = NULL_RTX;
16861 rtx tmp;
16862 bool first = true;
16864 num_regs = bit_count (saved_regs_mask);
16866 /* Must be at least one register to save, and can't save SP or PC. */
16867 gcc_assert (num_regs > 0 && num_regs <= 14);
16868 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16869 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16871 /* Create sequence for DWARF info. All the frame-related data for
16872 debugging is held in this wrapper. */
16873 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16875 /* Describe the stack adjustment. */
16876 tmp = gen_rtx_SET (VOIDmode,
16877 stack_pointer_rtx,
16878 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16879 RTX_FRAME_RELATED_P (tmp) = 1;
16880 XVECEXP (dwarf, 0, 0) = tmp;
16882 /* Find the first register. */
16883 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
16886 i = 0;
16888 /* If there's an odd number of registers to push. Start off by
16889 pushing a single register. This ensures that subsequent strd
16890 operations are dword aligned (assuming that SP was originally
16891 64-bit aligned). */
16892 if ((num_regs & 1) != 0)
16894 rtx reg, mem, insn;
16896 reg = gen_rtx_REG (SImode, regno);
16897 if (num_regs == 1)
16898 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
16899 stack_pointer_rtx));
16900 else
16901 mem = gen_frame_mem (Pmode,
16902 gen_rtx_PRE_MODIFY
16903 (Pmode, stack_pointer_rtx,
16904 plus_constant (Pmode, stack_pointer_rtx,
16905 -4 * num_regs)));
16907 tmp = gen_rtx_SET (VOIDmode, mem, reg);
16908 RTX_FRAME_RELATED_P (tmp) = 1;
16909 insn = emit_insn (tmp);
16910 RTX_FRAME_RELATED_P (insn) = 1;
16911 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16912 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
16913 reg);
16914 RTX_FRAME_RELATED_P (tmp) = 1;
16915 i++;
16916 regno++;
16917 XVECEXP (dwarf, 0, i) = tmp;
16918 first = false;
16921 while (i < num_regs)
16922 if (saved_regs_mask & (1 << regno))
16924 rtx reg1, reg2, mem1, mem2;
16925 rtx tmp0, tmp1, tmp2;
16926 int regno2;
16928 /* Find the register to pair with this one. */
16929 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
16930 regno2++)
16933 reg1 = gen_rtx_REG (SImode, regno);
16934 reg2 = gen_rtx_REG (SImode, regno2);
16936 if (first)
16938 rtx insn;
16940 first = false;
16941 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
16942 stack_pointer_rtx,
16943 -4 * num_regs));
16944 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
16945 stack_pointer_rtx,
16946 -4 * (num_regs - 1)));
16947 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16948 plus_constant (Pmode, stack_pointer_rtx,
16949 -4 * (num_regs)));
16950 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
16951 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
16952 RTX_FRAME_RELATED_P (tmp0) = 1;
16953 RTX_FRAME_RELATED_P (tmp1) = 1;
16954 RTX_FRAME_RELATED_P (tmp2) = 1;
16955 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
16956 XVECEXP (par, 0, 0) = tmp0;
16957 XVECEXP (par, 0, 1) = tmp1;
16958 XVECEXP (par, 0, 2) = tmp2;
16959 insn = emit_insn (par);
16960 RTX_FRAME_RELATED_P (insn) = 1;
16961 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16963 else
16965 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
16966 stack_pointer_rtx,
16967 4 * i));
16968 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
16969 stack_pointer_rtx,
16970 4 * (i + 1)));
16971 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
16972 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
16973 RTX_FRAME_RELATED_P (tmp1) = 1;
16974 RTX_FRAME_RELATED_P (tmp2) = 1;
16975 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16976 XVECEXP (par, 0, 0) = tmp1;
16977 XVECEXP (par, 0, 1) = tmp2;
16978 emit_insn (par);
16981 /* Create unwind information. This is an approximation. */
16982 tmp1 = gen_rtx_SET (VOIDmode,
16983 gen_frame_mem (Pmode,
16984 plus_constant (Pmode,
16985 stack_pointer_rtx,
16986 4 * i)),
16987 reg1);
16988 tmp2 = gen_rtx_SET (VOIDmode,
16989 gen_frame_mem (Pmode,
16990 plus_constant (Pmode,
16991 stack_pointer_rtx,
16992 4 * (i + 1))),
16993 reg2);
16995 RTX_FRAME_RELATED_P (tmp1) = 1;
16996 RTX_FRAME_RELATED_P (tmp2) = 1;
16997 XVECEXP (dwarf, 0, i + 1) = tmp1;
16998 XVECEXP (dwarf, 0, i + 2) = tmp2;
16999 i += 2;
17000 regno = regno2 + 1;
17002 else
17003 regno++;
17005 return;
17008 /* STRD in ARM mode requires consecutive registers. This function emits STRD
17009 whenever possible, otherwise it emits single-word stores. The first store
17010 also allocates stack space for all saved registers, using writeback with
17011 post-addressing mode. All other stores use offset addressing. If no STRD
17012 can be emitted, this function emits a sequence of single-word stores,
17013 and not an STM as before, because single-word stores provide more freedom
17014 scheduling and can be turned into an STM by peephole optimizations. */
17015 static void
17016 arm_emit_strd_push (unsigned long saved_regs_mask)
17018 int num_regs = 0;
17019 int i, j, dwarf_index = 0;
17020 int offset = 0;
17021 rtx dwarf = NULL_RTX;
17022 rtx insn = NULL_RTX;
17023 rtx tmp, mem;
17025 /* TODO: A more efficient code can be emitted by changing the
17026 layout, e.g., first push all pairs that can use STRD to keep the
17027 stack aligned, and then push all other registers. */
17028 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17029 if (saved_regs_mask & (1 << i))
17030 num_regs++;
17032 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17033 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
17034 gcc_assert (num_regs > 0);
17036 /* Create sequence for DWARF info. */
17037 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
17039 /* For dwarf info, we generate explicit stack update. */
17040 tmp = gen_rtx_SET (VOIDmode,
17041 stack_pointer_rtx,
17042 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17043 RTX_FRAME_RELATED_P (tmp) = 1;
17044 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17046 /* Save registers. */
17047 offset = - 4 * num_regs;
17048 j = 0;
17049 while (j <= LAST_ARM_REGNUM)
17050 if (saved_regs_mask & (1 << j))
17052 if ((j % 2 == 0)
17053 && (saved_regs_mask & (1 << (j + 1))))
17055 /* Current register and previous register form register pair for
17056 which STRD can be generated. */
17057 if (offset < 0)
17059 /* Allocate stack space for all saved registers. */
17060 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17061 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17062 mem = gen_frame_mem (DImode, tmp);
17063 offset = 0;
17065 else if (offset > 0)
17066 mem = gen_frame_mem (DImode,
17067 plus_constant (Pmode,
17068 stack_pointer_rtx,
17069 offset));
17070 else
17071 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17073 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
17074 RTX_FRAME_RELATED_P (tmp) = 1;
17075 tmp = emit_insn (tmp);
17077 /* Record the first store insn. */
17078 if (dwarf_index == 1)
17079 insn = tmp;
17081 /* Generate dwarf info. */
17082 mem = gen_frame_mem (SImode,
17083 plus_constant (Pmode,
17084 stack_pointer_rtx,
17085 offset));
17086 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17087 RTX_FRAME_RELATED_P (tmp) = 1;
17088 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17090 mem = gen_frame_mem (SImode,
17091 plus_constant (Pmode,
17092 stack_pointer_rtx,
17093 offset + 4));
17094 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
17095 RTX_FRAME_RELATED_P (tmp) = 1;
17096 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17098 offset += 8;
17099 j += 2;
17101 else
17103 /* Emit a single word store. */
17104 if (offset < 0)
17106 /* Allocate stack space for all saved registers. */
17107 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17108 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17109 mem = gen_frame_mem (SImode, tmp);
17110 offset = 0;
17112 else if (offset > 0)
17113 mem = gen_frame_mem (SImode,
17114 plus_constant (Pmode,
17115 stack_pointer_rtx,
17116 offset));
17117 else
17118 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17120 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17121 RTX_FRAME_RELATED_P (tmp) = 1;
17122 tmp = emit_insn (tmp);
17124 /* Record the first store insn. */
17125 if (dwarf_index == 1)
17126 insn = tmp;
17128 /* Generate dwarf info. */
17129 mem = gen_frame_mem (SImode,
17130 plus_constant(Pmode,
17131 stack_pointer_rtx,
17132 offset));
17133 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17134 RTX_FRAME_RELATED_P (tmp) = 1;
17135 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17137 offset += 4;
17138 j += 1;
17141 else
17142 j++;
17144 /* Attach dwarf info to the first insn we generate. */
17145 gcc_assert (insn != NULL_RTX);
17146 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17147 RTX_FRAME_RELATED_P (insn) = 1;
17150 /* Generate and emit an insn that we will recognize as a push_multi.
17151 Unfortunately, since this insn does not reflect very well the actual
17152 semantics of the operation, we need to annotate the insn for the benefit
17153 of DWARF2 frame unwind information. */
17154 static rtx
17155 emit_multi_reg_push (unsigned long mask)
17157 int num_regs = 0;
17158 int num_dwarf_regs;
17159 int i, j;
17160 rtx par;
17161 rtx dwarf;
17162 int dwarf_par_index;
17163 rtx tmp, reg;
17165 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17166 if (mask & (1 << i))
17167 num_regs++;
17169 gcc_assert (num_regs && num_regs <= 16);
17171 /* We don't record the PC in the dwarf frame information. */
17172 num_dwarf_regs = num_regs;
17173 if (mask & (1 << PC_REGNUM))
17174 num_dwarf_regs--;
17176 /* For the body of the insn we are going to generate an UNSPEC in
17177 parallel with several USEs. This allows the insn to be recognized
17178 by the push_multi pattern in the arm.md file.
17180 The body of the insn looks something like this:
17182 (parallel [
17183 (set (mem:BLK (pre_modify:SI (reg:SI sp)
17184 (const_int:SI <num>)))
17185 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
17186 (use (reg:SI XX))
17187 (use (reg:SI YY))
17191 For the frame note however, we try to be more explicit and actually
17192 show each register being stored into the stack frame, plus a (single)
17193 decrement of the stack pointer. We do it this way in order to be
17194 friendly to the stack unwinding code, which only wants to see a single
17195 stack decrement per instruction. The RTL we generate for the note looks
17196 something like this:
17198 (sequence [
17199 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
17200 (set (mem:SI (reg:SI sp)) (reg:SI r4))
17201 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
17202 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
17206 FIXME:: In an ideal world the PRE_MODIFY would not exist and
17207 instead we'd have a parallel expression detailing all
17208 the stores to the various memory addresses so that debug
17209 information is more up-to-date. Remember however while writing
17210 this to take care of the constraints with the push instruction.
17212 Note also that this has to be taken care of for the VFP registers.
17214 For more see PR43399. */
17216 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
17217 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
17218 dwarf_par_index = 1;
17220 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17222 if (mask & (1 << i))
17224 reg = gen_rtx_REG (SImode, i);
17226 XVECEXP (par, 0, 0)
17227 = gen_rtx_SET (VOIDmode,
17228 gen_frame_mem
17229 (BLKmode,
17230 gen_rtx_PRE_MODIFY (Pmode,
17231 stack_pointer_rtx,
17232 plus_constant
17233 (Pmode, stack_pointer_rtx,
17234 -4 * num_regs))
17236 gen_rtx_UNSPEC (BLKmode,
17237 gen_rtvec (1, reg),
17238 UNSPEC_PUSH_MULT));
17240 if (i != PC_REGNUM)
17242 tmp = gen_rtx_SET (VOIDmode,
17243 gen_frame_mem (SImode, stack_pointer_rtx),
17244 reg);
17245 RTX_FRAME_RELATED_P (tmp) = 1;
17246 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
17247 dwarf_par_index++;
17250 break;
17254 for (j = 1, i++; j < num_regs; i++)
17256 if (mask & (1 << i))
17258 reg = gen_rtx_REG (SImode, i);
17260 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
17262 if (i != PC_REGNUM)
17265 = gen_rtx_SET (VOIDmode,
17266 gen_frame_mem
17267 (SImode,
17268 plus_constant (Pmode, stack_pointer_rtx,
17269 4 * j)),
17270 reg);
17271 RTX_FRAME_RELATED_P (tmp) = 1;
17272 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
17275 j++;
17279 par = emit_insn (par);
17281 tmp = gen_rtx_SET (VOIDmode,
17282 stack_pointer_rtx,
17283 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17284 RTX_FRAME_RELATED_P (tmp) = 1;
17285 XVECEXP (dwarf, 0, 0) = tmp;
17287 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17289 return par;
17292 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
17293 SIZE is the offset to be adjusted.
17294 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
17295 static void
17296 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
17298 rtx dwarf;
17300 RTX_FRAME_RELATED_P (insn) = 1;
17301 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
17302 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
17305 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
17306 SAVED_REGS_MASK shows which registers need to be restored.
17308 Unfortunately, since this insn does not reflect very well the actual
17309 semantics of the operation, we need to annotate the insn for the benefit
17310 of DWARF2 frame unwind information. */
17311 static void
17312 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
17314 int num_regs = 0;
17315 int i, j;
17316 rtx par;
17317 rtx dwarf = NULL_RTX;
17318 rtx tmp, reg;
17319 bool return_in_pc;
17320 int offset_adj;
17321 int emit_update;
17323 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17324 offset_adj = return_in_pc ? 1 : 0;
17325 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17326 if (saved_regs_mask & (1 << i))
17327 num_regs++;
17329 gcc_assert (num_regs && num_regs <= 16);
17331 /* If SP is in reglist, then we don't emit SP update insn. */
17332 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
17334 /* The parallel needs to hold num_regs SETs
17335 and one SET for the stack update. */
17336 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
17338 if (return_in_pc)
17340 tmp = ret_rtx;
17341 XVECEXP (par, 0, 0) = tmp;
17344 if (emit_update)
17346 /* Increment the stack pointer, based on there being
17347 num_regs 4-byte registers to restore. */
17348 tmp = gen_rtx_SET (VOIDmode,
17349 stack_pointer_rtx,
17350 plus_constant (Pmode,
17351 stack_pointer_rtx,
17352 4 * num_regs));
17353 RTX_FRAME_RELATED_P (tmp) = 1;
17354 XVECEXP (par, 0, offset_adj) = tmp;
17357 /* Now restore every reg, which may include PC. */
17358 for (j = 0, i = 0; j < num_regs; i++)
17359 if (saved_regs_mask & (1 << i))
17361 reg = gen_rtx_REG (SImode, i);
17362 if ((num_regs == 1) && emit_update && !return_in_pc)
17364 /* Emit single load with writeback. */
17365 tmp = gen_frame_mem (SImode,
17366 gen_rtx_POST_INC (Pmode,
17367 stack_pointer_rtx));
17368 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
17369 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17370 return;
17373 tmp = gen_rtx_SET (VOIDmode,
17374 reg,
17375 gen_frame_mem
17376 (SImode,
17377 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
17378 RTX_FRAME_RELATED_P (tmp) = 1;
17379 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
17381 /* We need to maintain a sequence for DWARF info too. As dwarf info
17382 should not have PC, skip PC. */
17383 if (i != PC_REGNUM)
17384 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17386 j++;
17389 if (return_in_pc)
17390 par = emit_jump_insn (par);
17391 else
17392 par = emit_insn (par);
17394 REG_NOTES (par) = dwarf;
17395 if (!return_in_pc)
17396 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
17397 stack_pointer_rtx, stack_pointer_rtx);
17400 /* Generate and emit an insn pattern that we will recognize as a pop_multi
17401 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
17403 Unfortunately, since this insn does not reflect very well the actual
17404 semantics of the operation, we need to annotate the insn for the benefit
17405 of DWARF2 frame unwind information. */
17406 static void
17407 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
17409 int i, j;
17410 rtx par;
17411 rtx dwarf = NULL_RTX;
17412 rtx tmp, reg;
17414 gcc_assert (num_regs && num_regs <= 32);
17416 /* Workaround ARM10 VFPr1 bug. */
17417 if (num_regs == 2 && !arm_arch6)
17419 if (first_reg == 15)
17420 first_reg--;
17422 num_regs++;
17425 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
17426 there could be up to 32 D-registers to restore.
17427 If there are more than 16 D-registers, make two recursive calls,
17428 each of which emits one pop_multi instruction. */
17429 if (num_regs > 16)
17431 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
17432 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
17433 return;
17436 /* The parallel needs to hold num_regs SETs
17437 and one SET for the stack update. */
17438 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
17440 /* Increment the stack pointer, based on there being
17441 num_regs 8-byte registers to restore. */
17442 tmp = gen_rtx_SET (VOIDmode,
17443 base_reg,
17444 plus_constant (Pmode, base_reg, 8 * num_regs));
17445 RTX_FRAME_RELATED_P (tmp) = 1;
17446 XVECEXP (par, 0, 0) = tmp;
17448 /* Now show every reg that will be restored, using a SET for each. */
17449 for (j = 0, i=first_reg; j < num_regs; i += 2)
17451 reg = gen_rtx_REG (DFmode, i);
17453 tmp = gen_rtx_SET (VOIDmode,
17454 reg,
17455 gen_frame_mem
17456 (DFmode,
17457 plus_constant (Pmode, base_reg, 8 * j)));
17458 RTX_FRAME_RELATED_P (tmp) = 1;
17459 XVECEXP (par, 0, j + 1) = tmp;
17461 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17463 j++;
17466 par = emit_insn (par);
17467 REG_NOTES (par) = dwarf;
17469 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
17470 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
17472 RTX_FRAME_RELATED_P (par) = 1;
17473 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
17475 else
17476 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
17477 base_reg, base_reg);
17480 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
17481 number of registers are being popped, multiple LDRD patterns are created for
17482 all register pairs. If odd number of registers are popped, last register is
17483 loaded by using LDR pattern. */
17484 static void
17485 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
17487 int num_regs = 0;
17488 int i, j;
17489 rtx par = NULL_RTX;
17490 rtx dwarf = NULL_RTX;
17491 rtx tmp, reg, tmp1;
17492 bool return_in_pc;
17494 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17495 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17496 if (saved_regs_mask & (1 << i))
17497 num_regs++;
17499 gcc_assert (num_regs && num_regs <= 16);
17501 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
17502 to be popped. So, if num_regs is even, now it will become odd,
17503 and we can generate pop with PC. If num_regs is odd, it will be
17504 even now, and ldr with return can be generated for PC. */
17505 if (return_in_pc)
17506 num_regs--;
17508 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17510 /* Var j iterates over all the registers to gather all the registers in
17511 saved_regs_mask. Var i gives index of saved registers in stack frame.
17512 A PARALLEL RTX of register-pair is created here, so that pattern for
17513 LDRD can be matched. As PC is always last register to be popped, and
17514 we have already decremented num_regs if PC, we don't have to worry
17515 about PC in this loop. */
17516 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
17517 if (saved_regs_mask & (1 << j))
17519 /* Create RTX for memory load. */
17520 reg = gen_rtx_REG (SImode, j);
17521 tmp = gen_rtx_SET (SImode,
17522 reg,
17523 gen_frame_mem (SImode,
17524 plus_constant (Pmode,
17525 stack_pointer_rtx, 4 * i)));
17526 RTX_FRAME_RELATED_P (tmp) = 1;
17528 if (i % 2 == 0)
17530 /* When saved-register index (i) is even, the RTX to be emitted is
17531 yet to be created. Hence create it first. The LDRD pattern we
17532 are generating is :
17533 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
17534 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
17535 where target registers need not be consecutive. */
17536 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17537 dwarf = NULL_RTX;
17540 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
17541 added as 0th element and if i is odd, reg_i is added as 1st element
17542 of LDRD pattern shown above. */
17543 XVECEXP (par, 0, (i % 2)) = tmp;
17544 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17546 if ((i % 2) == 1)
17548 /* When saved-register index (i) is odd, RTXs for both the registers
17549 to be loaded are generated in above given LDRD pattern, and the
17550 pattern can be emitted now. */
17551 par = emit_insn (par);
17552 REG_NOTES (par) = dwarf;
17553 RTX_FRAME_RELATED_P (par) = 1;
17556 i++;
17559 /* If the number of registers pushed is odd AND return_in_pc is false OR
17560 number of registers are even AND return_in_pc is true, last register is
17561 popped using LDR. It can be PC as well. Hence, adjust the stack first and
17562 then LDR with post increment. */
17564 /* Increment the stack pointer, based on there being
17565 num_regs 4-byte registers to restore. */
17566 tmp = gen_rtx_SET (VOIDmode,
17567 stack_pointer_rtx,
17568 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
17569 RTX_FRAME_RELATED_P (tmp) = 1;
17570 tmp = emit_insn (tmp);
17571 if (!return_in_pc)
17573 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
17574 stack_pointer_rtx, stack_pointer_rtx);
17577 dwarf = NULL_RTX;
17579 if (((num_regs % 2) == 1 && !return_in_pc)
17580 || ((num_regs % 2) == 0 && return_in_pc))
17582 /* Scan for the single register to be popped. Skip until the saved
17583 register is found. */
17584 for (; (saved_regs_mask & (1 << j)) == 0; j++);
17586 /* Gen LDR with post increment here. */
17587 tmp1 = gen_rtx_MEM (SImode,
17588 gen_rtx_POST_INC (SImode,
17589 stack_pointer_rtx));
17590 set_mem_alias_set (tmp1, get_frame_alias_set ());
17592 reg = gen_rtx_REG (SImode, j);
17593 tmp = gen_rtx_SET (SImode, reg, tmp1);
17594 RTX_FRAME_RELATED_P (tmp) = 1;
17595 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17597 if (return_in_pc)
17599 /* If return_in_pc, j must be PC_REGNUM. */
17600 gcc_assert (j == PC_REGNUM);
17601 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17602 XVECEXP (par, 0, 0) = ret_rtx;
17603 XVECEXP (par, 0, 1) = tmp;
17604 par = emit_jump_insn (par);
17606 else
17608 par = emit_insn (tmp);
17609 REG_NOTES (par) = dwarf;
17610 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17611 stack_pointer_rtx, stack_pointer_rtx);
17615 else if ((num_regs % 2) == 1 && return_in_pc)
17617 /* There are 2 registers to be popped. So, generate the pattern
17618 pop_multiple_with_stack_update_and_return to pop in PC. */
17619 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
17622 return;
17625 /* LDRD in ARM mode needs consecutive registers as operands. This function
17626 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
17627 offset addressing and then generates one separate stack udpate. This provides
17628 more scheduling freedom, compared to writeback on every load. However,
17629 if the function returns using load into PC directly
17630 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
17631 before the last load. TODO: Add a peephole optimization to recognize
17632 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
17633 peephole optimization to merge the load at stack-offset zero
17634 with the stack update instruction using load with writeback
17635 in post-index addressing mode. */
17636 static void
17637 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
17639 int j = 0;
17640 int offset = 0;
17641 rtx par = NULL_RTX;
17642 rtx dwarf = NULL_RTX;
17643 rtx tmp, mem;
17645 /* Restore saved registers. */
17646 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
17647 j = 0;
17648 while (j <= LAST_ARM_REGNUM)
17649 if (saved_regs_mask & (1 << j))
17651 if ((j % 2) == 0
17652 && (saved_regs_mask & (1 << (j + 1)))
17653 && (j + 1) != PC_REGNUM)
17655 /* Current register and next register form register pair for which
17656 LDRD can be generated. PC is always the last register popped, and
17657 we handle it separately. */
17658 if (offset > 0)
17659 mem = gen_frame_mem (DImode,
17660 plus_constant (Pmode,
17661 stack_pointer_rtx,
17662 offset));
17663 else
17664 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17666 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
17667 tmp = emit_insn (tmp);
17668 RTX_FRAME_RELATED_P (tmp) = 1;
17670 /* Generate dwarf info. */
17672 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17673 gen_rtx_REG (SImode, j),
17674 NULL_RTX);
17675 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17676 gen_rtx_REG (SImode, j + 1),
17677 dwarf);
17679 REG_NOTES (tmp) = dwarf;
17681 offset += 8;
17682 j += 2;
17684 else if (j != PC_REGNUM)
17686 /* Emit a single word load. */
17687 if (offset > 0)
17688 mem = gen_frame_mem (SImode,
17689 plus_constant (Pmode,
17690 stack_pointer_rtx,
17691 offset));
17692 else
17693 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17695 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
17696 tmp = emit_insn (tmp);
17697 RTX_FRAME_RELATED_P (tmp) = 1;
17699 /* Generate dwarf info. */
17700 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
17701 gen_rtx_REG (SImode, j),
17702 NULL_RTX);
17704 offset += 4;
17705 j += 1;
17707 else /* j == PC_REGNUM */
17708 j++;
17710 else
17711 j++;
17713 /* Update the stack. */
17714 if (offset > 0)
17716 tmp = gen_rtx_SET (Pmode,
17717 stack_pointer_rtx,
17718 plus_constant (Pmode,
17719 stack_pointer_rtx,
17720 offset));
17721 tmp = emit_insn (tmp);
17722 arm_add_cfa_adjust_cfa_note (tmp, offset,
17723 stack_pointer_rtx, stack_pointer_rtx);
17724 offset = 0;
17727 if (saved_regs_mask & (1 << PC_REGNUM))
17729 /* Only PC is to be popped. */
17730 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17731 XVECEXP (par, 0, 0) = ret_rtx;
17732 tmp = gen_rtx_SET (SImode,
17733 gen_rtx_REG (SImode, PC_REGNUM),
17734 gen_frame_mem (SImode,
17735 gen_rtx_POST_INC (SImode,
17736 stack_pointer_rtx)));
17737 RTX_FRAME_RELATED_P (tmp) = 1;
17738 XVECEXP (par, 0, 1) = tmp;
17739 par = emit_jump_insn (par);
17741 /* Generate dwarf info. */
17742 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17743 gen_rtx_REG (SImode, PC_REGNUM),
17744 NULL_RTX);
17745 REG_NOTES (par) = dwarf;
17746 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17747 stack_pointer_rtx, stack_pointer_rtx);
17751 /* Calculate the size of the return value that is passed in registers. */
17752 static unsigned
17753 arm_size_return_regs (void)
17755 enum machine_mode mode;
17757 if (crtl->return_rtx != 0)
17758 mode = GET_MODE (crtl->return_rtx);
17759 else
17760 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17762 return GET_MODE_SIZE (mode);
17765 /* Return true if the current function needs to save/restore LR. */
17766 static bool
17767 thumb_force_lr_save (void)
17769 return !cfun->machine->lr_save_eliminated
17770 && (!leaf_function_p ()
17771 || thumb_far_jump_used_p ()
17772 || df_regs_ever_live_p (LR_REGNUM));
17775 /* We do not know if r3 will be available because
17776 we do have an indirect tailcall happening in this
17777 particular case. */
17778 static bool
17779 is_indirect_tailcall_p (rtx call)
17781 rtx pat = PATTERN (call);
17783 /* Indirect tail call. */
17784 pat = XVECEXP (pat, 0, 0);
17785 if (GET_CODE (pat) == SET)
17786 pat = SET_SRC (pat);
17788 pat = XEXP (XEXP (pat, 0), 0);
17789 return REG_P (pat);
17792 /* Return true if r3 is used by any of the tail call insns in the
17793 current function. */
17794 static bool
17795 any_sibcall_could_use_r3 (void)
17797 edge_iterator ei;
17798 edge e;
17800 if (!crtl->tail_call_emit)
17801 return false;
17802 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17803 if (e->flags & EDGE_SIBCALL)
17805 rtx call = BB_END (e->src);
17806 if (!CALL_P (call))
17807 call = prev_nonnote_nondebug_insn (call);
17808 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
17809 if (find_regno_fusage (call, USE, 3)
17810 || is_indirect_tailcall_p (call))
17811 return true;
17813 return false;
17817 /* Compute the distance from register FROM to register TO.
17818 These can be the arg pointer (26), the soft frame pointer (25),
17819 the stack pointer (13) or the hard frame pointer (11).
17820 In thumb mode r7 is used as the soft frame pointer, if needed.
17821 Typical stack layout looks like this:
17823 old stack pointer -> | |
17824 ----
17825 | | \
17826 | | saved arguments for
17827 | | vararg functions
17828 | | /
17830 hard FP & arg pointer -> | | \
17831 | | stack
17832 | | frame
17833 | | /
17835 | | \
17836 | | call saved
17837 | | registers
17838 soft frame pointer -> | | /
17840 | | \
17841 | | local
17842 | | variables
17843 locals base pointer -> | | /
17845 | | \
17846 | | outgoing
17847 | | arguments
17848 current stack pointer -> | | /
17851 For a given function some or all of these stack components
17852 may not be needed, giving rise to the possibility of
17853 eliminating some of the registers.
17855 The values returned by this function must reflect the behavior
17856 of arm_expand_prologue() and arm_compute_save_reg_mask().
17858 The sign of the number returned reflects the direction of stack
17859 growth, so the values are positive for all eliminations except
17860 from the soft frame pointer to the hard frame pointer.
17862 SFP may point just inside the local variables block to ensure correct
17863 alignment. */
17866 /* Calculate stack offsets. These are used to calculate register elimination
17867 offsets and in prologue/epilogue code. Also calculates which registers
17868 should be saved. */
17870 static arm_stack_offsets *
17871 arm_get_frame_offsets (void)
17873 struct arm_stack_offsets *offsets;
17874 unsigned long func_type;
17875 int leaf;
17876 int saved;
17877 int core_saved;
17878 HOST_WIDE_INT frame_size;
17879 int i;
17881 offsets = &cfun->machine->stack_offsets;
17883 /* We need to know if we are a leaf function. Unfortunately, it
17884 is possible to be called after start_sequence has been called,
17885 which causes get_insns to return the insns for the sequence,
17886 not the function, which will cause leaf_function_p to return
17887 the incorrect result.
17889 to know about leaf functions once reload has completed, and the
17890 frame size cannot be changed after that time, so we can safely
17891 use the cached value. */
17893 if (reload_completed)
17894 return offsets;
17896 /* Initially this is the size of the local variables. It will translated
17897 into an offset once we have determined the size of preceding data. */
17898 frame_size = ROUND_UP_WORD (get_frame_size ());
17900 leaf = leaf_function_p ();
17902 /* Space for variadic functions. */
17903 offsets->saved_args = crtl->args.pretend_args_size;
17905 /* In Thumb mode this is incorrect, but never used. */
17906 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
17907 arm_compute_static_chain_stack_bytes();
17909 if (TARGET_32BIT)
17911 unsigned int regno;
17913 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
17914 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17915 saved = core_saved;
17917 /* We know that SP will be doubleword aligned on entry, and we must
17918 preserve that condition at any subroutine call. We also require the
17919 soft frame pointer to be doubleword aligned. */
17921 if (TARGET_REALLY_IWMMXT)
17923 /* Check for the call-saved iWMMXt registers. */
17924 for (regno = FIRST_IWMMXT_REGNUM;
17925 regno <= LAST_IWMMXT_REGNUM;
17926 regno++)
17927 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17928 saved += 8;
17931 func_type = arm_current_func_type ();
17932 /* Space for saved VFP registers. */
17933 if (! IS_VOLATILE (func_type)
17934 && TARGET_HARD_FLOAT && TARGET_VFP)
17935 saved += arm_get_vfp_saved_size ();
17937 else /* TARGET_THUMB1 */
17939 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17940 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17941 saved = core_saved;
17942 if (TARGET_BACKTRACE)
17943 saved += 16;
17946 /* Saved registers include the stack frame. */
17947 offsets->saved_regs = offsets->saved_args + saved +
17948 arm_compute_static_chain_stack_bytes();
17949 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17950 /* A leaf function does not need any stack alignment if it has nothing
17951 on the stack. */
17952 if (leaf && frame_size == 0
17953 /* However if it calls alloca(), we have a dynamically allocated
17954 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17955 && ! cfun->calls_alloca)
17957 offsets->outgoing_args = offsets->soft_frame;
17958 offsets->locals_base = offsets->soft_frame;
17959 return offsets;
17962 /* Ensure SFP has the correct alignment. */
17963 if (ARM_DOUBLEWORD_ALIGN
17964 && (offsets->soft_frame & 7))
17966 offsets->soft_frame += 4;
17967 /* Try to align stack by pushing an extra reg. Don't bother doing this
17968 when there is a stack frame as the alignment will be rolled into
17969 the normal stack adjustment. */
17970 if (frame_size + crtl->outgoing_args_size == 0)
17972 int reg = -1;
17974 /* If it is safe to use r3, then do so. This sometimes
17975 generates better code on Thumb-2 by avoiding the need to
17976 use 32-bit push/pop instructions. */
17977 if (! any_sibcall_could_use_r3 ()
17978 && arm_size_return_regs () <= 12
17979 && (offsets->saved_regs_mask & (1 << 3)) == 0
17980 && (TARGET_THUMB2
17981 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
17983 reg = 3;
17985 else
17986 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17988 /* Avoid fixed registers; they may be changed at
17989 arbitrary times so it's unsafe to restore them
17990 during the epilogue. */
17991 if (!fixed_regs[i]
17992 && (offsets->saved_regs_mask & (1 << i)) == 0)
17994 reg = i;
17995 break;
17999 if (reg != -1)
18001 offsets->saved_regs += 4;
18002 offsets->saved_regs_mask |= (1 << reg);
18007 offsets->locals_base = offsets->soft_frame + frame_size;
18008 offsets->outgoing_args = (offsets->locals_base
18009 + crtl->outgoing_args_size);
18011 if (ARM_DOUBLEWORD_ALIGN)
18013 /* Ensure SP remains doubleword aligned. */
18014 if (offsets->outgoing_args & 7)
18015 offsets->outgoing_args += 4;
18016 gcc_assert (!(offsets->outgoing_args & 7));
18019 return offsets;
18023 /* Calculate the relative offsets for the different stack pointers. Positive
18024 offsets are in the direction of stack growth. */
18026 HOST_WIDE_INT
18027 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
18029 arm_stack_offsets *offsets;
18031 offsets = arm_get_frame_offsets ();
18033 /* OK, now we have enough information to compute the distances.
18034 There must be an entry in these switch tables for each pair
18035 of registers in ELIMINABLE_REGS, even if some of the entries
18036 seem to be redundant or useless. */
18037 switch (from)
18039 case ARG_POINTER_REGNUM:
18040 switch (to)
18042 case THUMB_HARD_FRAME_POINTER_REGNUM:
18043 return 0;
18045 case FRAME_POINTER_REGNUM:
18046 /* This is the reverse of the soft frame pointer
18047 to hard frame pointer elimination below. */
18048 return offsets->soft_frame - offsets->saved_args;
18050 case ARM_HARD_FRAME_POINTER_REGNUM:
18051 /* This is only non-zero in the case where the static chain register
18052 is stored above the frame. */
18053 return offsets->frame - offsets->saved_args - 4;
18055 case STACK_POINTER_REGNUM:
18056 /* If nothing has been pushed on the stack at all
18057 then this will return -4. This *is* correct! */
18058 return offsets->outgoing_args - (offsets->saved_args + 4);
18060 default:
18061 gcc_unreachable ();
18063 gcc_unreachable ();
18065 case FRAME_POINTER_REGNUM:
18066 switch (to)
18068 case THUMB_HARD_FRAME_POINTER_REGNUM:
18069 return 0;
18071 case ARM_HARD_FRAME_POINTER_REGNUM:
18072 /* The hard frame pointer points to the top entry in the
18073 stack frame. The soft frame pointer to the bottom entry
18074 in the stack frame. If there is no stack frame at all,
18075 then they are identical. */
18077 return offsets->frame - offsets->soft_frame;
18079 case STACK_POINTER_REGNUM:
18080 return offsets->outgoing_args - offsets->soft_frame;
18082 default:
18083 gcc_unreachable ();
18085 gcc_unreachable ();
18087 default:
18088 /* You cannot eliminate from the stack pointer.
18089 In theory you could eliminate from the hard frame
18090 pointer to the stack pointer, but this will never
18091 happen, since if a stack frame is not needed the
18092 hard frame pointer will never be used. */
18093 gcc_unreachable ();
18097 /* Given FROM and TO register numbers, say whether this elimination is
18098 allowed. Frame pointer elimination is automatically handled.
18100 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
18101 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
18102 pointer, we must eliminate FRAME_POINTER_REGNUM into
18103 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
18104 ARG_POINTER_REGNUM. */
18106 bool
18107 arm_can_eliminate (const int from, const int to)
18109 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
18110 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
18111 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
18112 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
18113 true);
18116 /* Emit RTL to save coprocessor registers on function entry. Returns the
18117 number of bytes pushed. */
18119 static int
18120 arm_save_coproc_regs(void)
18122 int saved_size = 0;
18123 unsigned reg;
18124 unsigned start_reg;
18125 rtx insn;
18127 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
18128 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18130 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
18131 insn = gen_rtx_MEM (V2SImode, insn);
18132 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
18133 RTX_FRAME_RELATED_P (insn) = 1;
18134 saved_size += 8;
18137 if (TARGET_HARD_FLOAT && TARGET_VFP)
18139 start_reg = FIRST_VFP_REGNUM;
18141 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
18143 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
18144 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
18146 if (start_reg != reg)
18147 saved_size += vfp_emit_fstmd (start_reg,
18148 (reg - start_reg) / 2);
18149 start_reg = reg + 2;
18152 if (start_reg != reg)
18153 saved_size += vfp_emit_fstmd (start_reg,
18154 (reg - start_reg) / 2);
18156 return saved_size;
18160 /* Set the Thumb frame pointer from the stack pointer. */
18162 static void
18163 thumb_set_frame_pointer (arm_stack_offsets *offsets)
18165 HOST_WIDE_INT amount;
18166 rtx insn, dwarf;
18168 amount = offsets->outgoing_args - offsets->locals_base;
18169 if (amount < 1024)
18170 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18171 stack_pointer_rtx, GEN_INT (amount)));
18172 else
18174 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
18175 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
18176 expects the first two operands to be the same. */
18177 if (TARGET_THUMB2)
18179 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18180 stack_pointer_rtx,
18181 hard_frame_pointer_rtx));
18183 else
18185 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18186 hard_frame_pointer_rtx,
18187 stack_pointer_rtx));
18189 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
18190 plus_constant (Pmode, stack_pointer_rtx, amount));
18191 RTX_FRAME_RELATED_P (dwarf) = 1;
18192 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18195 RTX_FRAME_RELATED_P (insn) = 1;
18198 /* Generate the prologue instructions for entry into an ARM or Thumb-2
18199 function. */
18200 void
18201 arm_expand_prologue (void)
18203 rtx amount;
18204 rtx insn;
18205 rtx ip_rtx;
18206 unsigned long live_regs_mask;
18207 unsigned long func_type;
18208 int fp_offset = 0;
18209 int saved_pretend_args = 0;
18210 int saved_regs = 0;
18211 unsigned HOST_WIDE_INT args_to_push;
18212 arm_stack_offsets *offsets;
18214 func_type = arm_current_func_type ();
18216 /* Naked functions don't have prologues. */
18217 if (IS_NAKED (func_type))
18218 return;
18220 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
18221 args_to_push = crtl->args.pretend_args_size;
18223 /* Compute which register we will have to save onto the stack. */
18224 offsets = arm_get_frame_offsets ();
18225 live_regs_mask = offsets->saved_regs_mask;
18227 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
18229 if (IS_STACKALIGN (func_type))
18231 rtx r0, r1;
18233 /* Handle a word-aligned stack pointer. We generate the following:
18235 mov r0, sp
18236 bic r1, r0, #7
18237 mov sp, r1
18238 <save and restore r0 in normal prologue/epilogue>
18239 mov sp, r0
18240 bx lr
18242 The unwinder doesn't need to know about the stack realignment.
18243 Just tell it we saved SP in r0. */
18244 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
18246 r0 = gen_rtx_REG (SImode, 0);
18247 r1 = gen_rtx_REG (SImode, 1);
18249 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
18250 RTX_FRAME_RELATED_P (insn) = 1;
18251 add_reg_note (insn, REG_CFA_REGISTER, NULL);
18253 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
18255 /* ??? The CFA changes here, which may cause GDB to conclude that it
18256 has entered a different function. That said, the unwind info is
18257 correct, individually, before and after this instruction because
18258 we've described the save of SP, which will override the default
18259 handling of SP as restoring from the CFA. */
18260 emit_insn (gen_movsi (stack_pointer_rtx, r1));
18263 /* For APCS frames, if IP register is clobbered
18264 when creating frame, save that register in a special
18265 way. */
18266 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18268 if (IS_INTERRUPT (func_type))
18270 /* Interrupt functions must not corrupt any registers.
18271 Creating a frame pointer however, corrupts the IP
18272 register, so we must push it first. */
18273 emit_multi_reg_push (1 << IP_REGNUM);
18275 /* Do not set RTX_FRAME_RELATED_P on this insn.
18276 The dwarf stack unwinding code only wants to see one
18277 stack decrement per function, and this is not it. If
18278 this instruction is labeled as being part of the frame
18279 creation sequence then dwarf2out_frame_debug_expr will
18280 die when it encounters the assignment of IP to FP
18281 later on, since the use of SP here establishes SP as
18282 the CFA register and not IP.
18284 Anyway this instruction is not really part of the stack
18285 frame creation although it is part of the prologue. */
18287 else if (IS_NESTED (func_type))
18289 /* The Static chain register is the same as the IP register
18290 used as a scratch register during stack frame creation.
18291 To get around this need to find somewhere to store IP
18292 whilst the frame is being created. We try the following
18293 places in order:
18295 1. The last argument register.
18296 2. A slot on the stack above the frame. (This only
18297 works if the function is not a varargs function).
18298 3. Register r3, after pushing the argument registers
18299 onto the stack.
18301 Note - we only need to tell the dwarf2 backend about the SP
18302 adjustment in the second variant; the static chain register
18303 doesn't need to be unwound, as it doesn't contain a value
18304 inherited from the caller. */
18306 if (df_regs_ever_live_p (3) == false)
18307 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18308 else if (args_to_push == 0)
18310 rtx dwarf;
18312 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
18313 saved_regs += 4;
18315 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
18316 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
18317 fp_offset = 4;
18319 /* Just tell the dwarf backend that we adjusted SP. */
18320 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18321 plus_constant (Pmode, stack_pointer_rtx,
18322 -fp_offset));
18323 RTX_FRAME_RELATED_P (insn) = 1;
18324 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18326 else
18328 /* Store the args on the stack. */
18329 if (cfun->machine->uses_anonymous_args)
18330 insn = emit_multi_reg_push
18331 ((0xf0 >> (args_to_push / 4)) & 0xf);
18332 else
18333 insn = emit_insn
18334 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18335 GEN_INT (- args_to_push)));
18337 RTX_FRAME_RELATED_P (insn) = 1;
18339 saved_pretend_args = 1;
18340 fp_offset = args_to_push;
18341 args_to_push = 0;
18343 /* Now reuse r3 to preserve IP. */
18344 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18348 insn = emit_set_insn (ip_rtx,
18349 plus_constant (Pmode, stack_pointer_rtx,
18350 fp_offset));
18351 RTX_FRAME_RELATED_P (insn) = 1;
18354 if (args_to_push)
18356 /* Push the argument registers, or reserve space for them. */
18357 if (cfun->machine->uses_anonymous_args)
18358 insn = emit_multi_reg_push
18359 ((0xf0 >> (args_to_push / 4)) & 0xf);
18360 else
18361 insn = emit_insn
18362 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18363 GEN_INT (- args_to_push)));
18364 RTX_FRAME_RELATED_P (insn) = 1;
18367 /* If this is an interrupt service routine, and the link register
18368 is going to be pushed, and we're not generating extra
18369 push of IP (needed when frame is needed and frame layout if apcs),
18370 subtracting four from LR now will mean that the function return
18371 can be done with a single instruction. */
18372 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
18373 && (live_regs_mask & (1 << LR_REGNUM)) != 0
18374 && !(frame_pointer_needed && TARGET_APCS_FRAME)
18375 && TARGET_ARM)
18377 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
18379 emit_set_insn (lr, plus_constant (SImode, lr, -4));
18382 if (live_regs_mask)
18384 saved_regs += bit_count (live_regs_mask) * 4;
18385 if (optimize_size && !frame_pointer_needed
18386 && saved_regs == offsets->saved_regs - offsets->saved_args)
18388 /* If no coprocessor registers are being pushed and we don't have
18389 to worry about a frame pointer then push extra registers to
18390 create the stack frame. This is done is a way that does not
18391 alter the frame layout, so is independent of the epilogue. */
18392 int n;
18393 int frame;
18394 n = 0;
18395 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
18396 n++;
18397 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
18398 if (frame && n * 4 >= frame)
18400 n = frame / 4;
18401 live_regs_mask |= (1 << n) - 1;
18402 saved_regs += frame;
18406 if (TARGET_LDRD
18407 && current_tune->prefer_ldrd_strd
18408 && !optimize_function_for_size_p (cfun))
18410 if (TARGET_THUMB2)
18412 thumb2_emit_strd_push (live_regs_mask);
18414 else if (TARGET_ARM
18415 && !TARGET_APCS_FRAME
18416 && !IS_INTERRUPT (func_type))
18418 arm_emit_strd_push (live_regs_mask);
18420 else
18422 insn = emit_multi_reg_push (live_regs_mask);
18423 RTX_FRAME_RELATED_P (insn) = 1;
18426 else
18428 insn = emit_multi_reg_push (live_regs_mask);
18429 RTX_FRAME_RELATED_P (insn) = 1;
18433 if (! IS_VOLATILE (func_type))
18434 saved_regs += arm_save_coproc_regs ();
18436 if (frame_pointer_needed && TARGET_ARM)
18438 /* Create the new frame pointer. */
18439 if (TARGET_APCS_FRAME)
18441 insn = GEN_INT (-(4 + args_to_push + fp_offset));
18442 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
18443 RTX_FRAME_RELATED_P (insn) = 1;
18445 if (IS_NESTED (func_type))
18447 /* Recover the static chain register. */
18448 if (!df_regs_ever_live_p (3)
18449 || saved_pretend_args)
18450 insn = gen_rtx_REG (SImode, 3);
18451 else /* if (crtl->args.pretend_args_size == 0) */
18453 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
18454 insn = gen_frame_mem (SImode, insn);
18456 emit_set_insn (ip_rtx, insn);
18457 /* Add a USE to stop propagate_one_insn() from barfing. */
18458 emit_insn (gen_force_register_use (ip_rtx));
18461 else
18463 insn = GEN_INT (saved_regs - 4);
18464 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18465 stack_pointer_rtx, insn));
18466 RTX_FRAME_RELATED_P (insn) = 1;
18470 if (flag_stack_usage_info)
18471 current_function_static_stack_size
18472 = offsets->outgoing_args - offsets->saved_args;
18474 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
18476 /* This add can produce multiple insns for a large constant, so we
18477 need to get tricky. */
18478 rtx last = get_last_insn ();
18480 amount = GEN_INT (offsets->saved_args + saved_regs
18481 - offsets->outgoing_args);
18483 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18484 amount));
18487 last = last ? NEXT_INSN (last) : get_insns ();
18488 RTX_FRAME_RELATED_P (last) = 1;
18490 while (last != insn);
18492 /* If the frame pointer is needed, emit a special barrier that
18493 will prevent the scheduler from moving stores to the frame
18494 before the stack adjustment. */
18495 if (frame_pointer_needed)
18496 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
18497 hard_frame_pointer_rtx));
18501 if (frame_pointer_needed && TARGET_THUMB2)
18502 thumb_set_frame_pointer (offsets);
18504 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18506 unsigned long mask;
18508 mask = live_regs_mask;
18509 mask &= THUMB2_WORK_REGS;
18510 if (!IS_NESTED (func_type))
18511 mask |= (1 << IP_REGNUM);
18512 arm_load_pic_register (mask);
18515 /* If we are profiling, make sure no instructions are scheduled before
18516 the call to mcount. Similarly if the user has requested no
18517 scheduling in the prolog. Similarly if we want non-call exceptions
18518 using the EABI unwinder, to prevent faulting instructions from being
18519 swapped with a stack adjustment. */
18520 if (crtl->profile || !TARGET_SCHED_PROLOG
18521 || (arm_except_unwind_info (&global_options) == UI_TARGET
18522 && cfun->can_throw_non_call_exceptions))
18523 emit_insn (gen_blockage ());
18525 /* If the link register is being kept alive, with the return address in it,
18526 then make sure that it does not get reused by the ce2 pass. */
18527 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
18528 cfun->machine->lr_save_eliminated = 1;
18531 /* Print condition code to STREAM. Helper function for arm_print_operand. */
18532 static void
18533 arm_print_condition (FILE *stream)
18535 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
18537 /* Branch conversion is not implemented for Thumb-2. */
18538 if (TARGET_THUMB)
18540 output_operand_lossage ("predicated Thumb instruction");
18541 return;
18543 if (current_insn_predicate != NULL)
18545 output_operand_lossage
18546 ("predicated instruction in conditional sequence");
18547 return;
18550 fputs (arm_condition_codes[arm_current_cc], stream);
18552 else if (current_insn_predicate)
18554 enum arm_cond_code code;
18556 if (TARGET_THUMB1)
18558 output_operand_lossage ("predicated Thumb instruction");
18559 return;
18562 code = get_arm_condition_code (current_insn_predicate);
18563 fputs (arm_condition_codes[code], stream);
18568 /* If CODE is 'd', then the X is a condition operand and the instruction
18569 should only be executed if the condition is true.
18570 if CODE is 'D', then the X is a condition operand and the instruction
18571 should only be executed if the condition is false: however, if the mode
18572 of the comparison is CCFPEmode, then always execute the instruction -- we
18573 do this because in these circumstances !GE does not necessarily imply LT;
18574 in these cases the instruction pattern will take care to make sure that
18575 an instruction containing %d will follow, thereby undoing the effects of
18576 doing this instruction unconditionally.
18577 If CODE is 'N' then X is a floating point operand that must be negated
18578 before output.
18579 If CODE is 'B' then output a bitwise inverted value of X (a const int).
18580 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
18581 static void
18582 arm_print_operand (FILE *stream, rtx x, int code)
18584 switch (code)
18586 case '@':
18587 fputs (ASM_COMMENT_START, stream);
18588 return;
18590 case '_':
18591 fputs (user_label_prefix, stream);
18592 return;
18594 case '|':
18595 fputs (REGISTER_PREFIX, stream);
18596 return;
18598 case '?':
18599 arm_print_condition (stream);
18600 return;
18602 case '(':
18603 /* Nothing in unified syntax, otherwise the current condition code. */
18604 if (!TARGET_UNIFIED_ASM)
18605 arm_print_condition (stream);
18606 break;
18608 case ')':
18609 /* The current condition code in unified syntax, otherwise nothing. */
18610 if (TARGET_UNIFIED_ASM)
18611 arm_print_condition (stream);
18612 break;
18614 case '.':
18615 /* The current condition code for a condition code setting instruction.
18616 Preceded by 's' in unified syntax, otherwise followed by 's'. */
18617 if (TARGET_UNIFIED_ASM)
18619 fputc('s', stream);
18620 arm_print_condition (stream);
18622 else
18624 arm_print_condition (stream);
18625 fputc('s', stream);
18627 return;
18629 case '!':
18630 /* If the instruction is conditionally executed then print
18631 the current condition code, otherwise print 's'. */
18632 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
18633 if (current_insn_predicate)
18634 arm_print_condition (stream);
18635 else
18636 fputc('s', stream);
18637 break;
18639 /* %# is a "break" sequence. It doesn't output anything, but is used to
18640 separate e.g. operand numbers from following text, if that text consists
18641 of further digits which we don't want to be part of the operand
18642 number. */
18643 case '#':
18644 return;
18646 case 'N':
18648 REAL_VALUE_TYPE r;
18649 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
18650 r = real_value_negate (&r);
18651 fprintf (stream, "%s", fp_const_from_val (&r));
18653 return;
18655 /* An integer or symbol address without a preceding # sign. */
18656 case 'c':
18657 switch (GET_CODE (x))
18659 case CONST_INT:
18660 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18661 break;
18663 case SYMBOL_REF:
18664 output_addr_const (stream, x);
18665 break;
18667 case CONST:
18668 if (GET_CODE (XEXP (x, 0)) == PLUS
18669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
18671 output_addr_const (stream, x);
18672 break;
18674 /* Fall through. */
18676 default:
18677 output_operand_lossage ("Unsupported operand for code '%c'", code);
18679 return;
18681 /* An integer that we want to print in HEX. */
18682 case 'x':
18683 switch (GET_CODE (x))
18685 case CONST_INT:
18686 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
18687 break;
18689 default:
18690 output_operand_lossage ("Unsupported operand for code '%c'", code);
18692 return;
18694 case 'B':
18695 if (CONST_INT_P (x))
18697 HOST_WIDE_INT val;
18698 val = ARM_SIGN_EXTEND (~INTVAL (x));
18699 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
18701 else
18703 putc ('~', stream);
18704 output_addr_const (stream, x);
18706 return;
18708 case 'L':
18709 /* The low 16 bits of an immediate constant. */
18710 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
18711 return;
18713 case 'i':
18714 fprintf (stream, "%s", arithmetic_instr (x, 1));
18715 return;
18717 case 'I':
18718 fprintf (stream, "%s", arithmetic_instr (x, 0));
18719 return;
18721 case 'S':
18723 HOST_WIDE_INT val;
18724 const char *shift;
18726 shift = shift_op (x, &val);
18728 if (shift)
18730 fprintf (stream, ", %s ", shift);
18731 if (val == -1)
18732 arm_print_operand (stream, XEXP (x, 1), 0);
18733 else
18734 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
18737 return;
18739 /* An explanation of the 'Q', 'R' and 'H' register operands:
18741 In a pair of registers containing a DI or DF value the 'Q'
18742 operand returns the register number of the register containing
18743 the least significant part of the value. The 'R' operand returns
18744 the register number of the register containing the most
18745 significant part of the value.
18747 The 'H' operand returns the higher of the two register numbers.
18748 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
18749 same as the 'Q' operand, since the most significant part of the
18750 value is held in the lower number register. The reverse is true
18751 on systems where WORDS_BIG_ENDIAN is false.
18753 The purpose of these operands is to distinguish between cases
18754 where the endian-ness of the values is important (for example
18755 when they are added together), and cases where the endian-ness
18756 is irrelevant, but the order of register operations is important.
18757 For example when loading a value from memory into a register
18758 pair, the endian-ness does not matter. Provided that the value
18759 from the lower memory address is put into the lower numbered
18760 register, and the value from the higher address is put into the
18761 higher numbered register, the load will work regardless of whether
18762 the value being loaded is big-wordian or little-wordian. The
18763 order of the two register loads can matter however, if the address
18764 of the memory location is actually held in one of the registers
18765 being overwritten by the load.
18767 The 'Q' and 'R' constraints are also available for 64-bit
18768 constants. */
18769 case 'Q':
18770 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18772 rtx part = gen_lowpart (SImode, x);
18773 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18774 return;
18777 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18779 output_operand_lossage ("invalid operand for code '%c'", code);
18780 return;
18783 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
18784 return;
18786 case 'R':
18787 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18789 enum machine_mode mode = GET_MODE (x);
18790 rtx part;
18792 if (mode == VOIDmode)
18793 mode = DImode;
18794 part = gen_highpart_mode (SImode, mode, x);
18795 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18796 return;
18799 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18801 output_operand_lossage ("invalid operand for code '%c'", code);
18802 return;
18805 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
18806 return;
18808 case 'H':
18809 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18811 output_operand_lossage ("invalid operand for code '%c'", code);
18812 return;
18815 asm_fprintf (stream, "%r", REGNO (x) + 1);
18816 return;
18818 case 'J':
18819 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18821 output_operand_lossage ("invalid operand for code '%c'", code);
18822 return;
18825 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
18826 return;
18828 case 'K':
18829 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18831 output_operand_lossage ("invalid operand for code '%c'", code);
18832 return;
18835 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
18836 return;
18838 case 'm':
18839 asm_fprintf (stream, "%r",
18840 REG_P (XEXP (x, 0))
18841 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
18842 return;
18844 case 'M':
18845 asm_fprintf (stream, "{%r-%r}",
18846 REGNO (x),
18847 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
18848 return;
18850 /* Like 'M', but writing doubleword vector registers, for use by Neon
18851 insns. */
18852 case 'h':
18854 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
18855 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
18856 if (numregs == 1)
18857 asm_fprintf (stream, "{d%d}", regno);
18858 else
18859 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
18861 return;
18863 case 'd':
18864 /* CONST_TRUE_RTX means always -- that's the default. */
18865 if (x == const_true_rtx)
18866 return;
18868 if (!COMPARISON_P (x))
18870 output_operand_lossage ("invalid operand for code '%c'", code);
18871 return;
18874 fputs (arm_condition_codes[get_arm_condition_code (x)],
18875 stream);
18876 return;
18878 case 'D':
18879 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
18880 want to do that. */
18881 if (x == const_true_rtx)
18883 output_operand_lossage ("instruction never executed");
18884 return;
18886 if (!COMPARISON_P (x))
18888 output_operand_lossage ("invalid operand for code '%c'", code);
18889 return;
18892 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
18893 (get_arm_condition_code (x))],
18894 stream);
18895 return;
18897 case 's':
18898 case 'V':
18899 case 'W':
18900 case 'X':
18901 case 'Y':
18902 case 'Z':
18903 /* Former Maverick support, removed after GCC-4.7. */
18904 output_operand_lossage ("obsolete Maverick format code '%c'", code);
18905 return;
18907 case 'U':
18908 if (!REG_P (x)
18909 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
18910 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
18911 /* Bad value for wCG register number. */
18913 output_operand_lossage ("invalid operand for code '%c'", code);
18914 return;
18917 else
18918 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
18919 return;
18921 /* Print an iWMMXt control register name. */
18922 case 'w':
18923 if (!CONST_INT_P (x)
18924 || INTVAL (x) < 0
18925 || INTVAL (x) >= 16)
18926 /* Bad value for wC register number. */
18928 output_operand_lossage ("invalid operand for code '%c'", code);
18929 return;
18932 else
18934 static const char * wc_reg_names [16] =
18936 "wCID", "wCon", "wCSSF", "wCASF",
18937 "wC4", "wC5", "wC6", "wC7",
18938 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18939 "wC12", "wC13", "wC14", "wC15"
18942 fputs (wc_reg_names [INTVAL (x)], stream);
18944 return;
18946 /* Print the high single-precision register of a VFP double-precision
18947 register. */
18948 case 'p':
18950 int mode = GET_MODE (x);
18951 int regno;
18953 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18955 output_operand_lossage ("invalid operand for code '%c'", code);
18956 return;
18959 regno = REGNO (x);
18960 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18962 output_operand_lossage ("invalid operand for code '%c'", code);
18963 return;
18966 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18968 return;
18970 /* Print a VFP/Neon double precision or quad precision register name. */
18971 case 'P':
18972 case 'q':
18974 int mode = GET_MODE (x);
18975 int is_quad = (code == 'q');
18976 int regno;
18978 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18980 output_operand_lossage ("invalid operand for code '%c'", code);
18981 return;
18984 if (!REG_P (x)
18985 || !IS_VFP_REGNUM (REGNO (x)))
18987 output_operand_lossage ("invalid operand for code '%c'", code);
18988 return;
18991 regno = REGNO (x);
18992 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18993 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18995 output_operand_lossage ("invalid operand for code '%c'", code);
18996 return;
18999 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
19000 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
19002 return;
19004 /* These two codes print the low/high doubleword register of a Neon quad
19005 register, respectively. For pair-structure types, can also print
19006 low/high quadword registers. */
19007 case 'e':
19008 case 'f':
19010 int mode = GET_MODE (x);
19011 int regno;
19013 if ((GET_MODE_SIZE (mode) != 16
19014 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
19016 output_operand_lossage ("invalid operand for code '%c'", code);
19017 return;
19020 regno = REGNO (x);
19021 if (!NEON_REGNO_OK_FOR_QUAD (regno))
19023 output_operand_lossage ("invalid operand for code '%c'", code);
19024 return;
19027 if (GET_MODE_SIZE (mode) == 16)
19028 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
19029 + (code == 'f' ? 1 : 0));
19030 else
19031 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
19032 + (code == 'f' ? 1 : 0));
19034 return;
19036 /* Print a VFPv3 floating-point constant, represented as an integer
19037 index. */
19038 case 'G':
19040 int index = vfp3_const_double_index (x);
19041 gcc_assert (index != -1);
19042 fprintf (stream, "%d", index);
19044 return;
19046 /* Print bits representing opcode features for Neon.
19048 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
19049 and polynomials as unsigned.
19051 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
19053 Bit 2 is 1 for rounding functions, 0 otherwise. */
19055 /* Identify the type as 's', 'u', 'p' or 'f'. */
19056 case 'T':
19058 HOST_WIDE_INT bits = INTVAL (x);
19059 fputc ("uspf"[bits & 3], stream);
19061 return;
19063 /* Likewise, but signed and unsigned integers are both 'i'. */
19064 case 'F':
19066 HOST_WIDE_INT bits = INTVAL (x);
19067 fputc ("iipf"[bits & 3], stream);
19069 return;
19071 /* As for 'T', but emit 'u' instead of 'p'. */
19072 case 't':
19074 HOST_WIDE_INT bits = INTVAL (x);
19075 fputc ("usuf"[bits & 3], stream);
19077 return;
19079 /* Bit 2: rounding (vs none). */
19080 case 'O':
19082 HOST_WIDE_INT bits = INTVAL (x);
19083 fputs ((bits & 4) != 0 ? "r" : "", stream);
19085 return;
19087 /* Memory operand for vld1/vst1 instruction. */
19088 case 'A':
19090 rtx addr;
19091 bool postinc = FALSE;
19092 unsigned align, memsize, align_bits;
19094 gcc_assert (MEM_P (x));
19095 addr = XEXP (x, 0);
19096 if (GET_CODE (addr) == POST_INC)
19098 postinc = 1;
19099 addr = XEXP (addr, 0);
19101 asm_fprintf (stream, "[%r", REGNO (addr));
19103 /* We know the alignment of this access, so we can emit a hint in the
19104 instruction (for some alignments) as an aid to the memory subsystem
19105 of the target. */
19106 align = MEM_ALIGN (x) >> 3;
19107 memsize = MEM_SIZE (x);
19109 /* Only certain alignment specifiers are supported by the hardware. */
19110 if (memsize == 32 && (align % 32) == 0)
19111 align_bits = 256;
19112 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
19113 align_bits = 128;
19114 else if (memsize >= 8 && (align % 8) == 0)
19115 align_bits = 64;
19116 else
19117 align_bits = 0;
19119 if (align_bits != 0)
19120 asm_fprintf (stream, ":%d", align_bits);
19122 asm_fprintf (stream, "]");
19124 if (postinc)
19125 fputs("!", stream);
19127 return;
19129 case 'C':
19131 rtx addr;
19133 gcc_assert (MEM_P (x));
19134 addr = XEXP (x, 0);
19135 gcc_assert (REG_P (addr));
19136 asm_fprintf (stream, "[%r]", REGNO (addr));
19138 return;
19140 /* Translate an S register number into a D register number and element index. */
19141 case 'y':
19143 int mode = GET_MODE (x);
19144 int regno;
19146 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
19148 output_operand_lossage ("invalid operand for code '%c'", code);
19149 return;
19152 regno = REGNO (x);
19153 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19155 output_operand_lossage ("invalid operand for code '%c'", code);
19156 return;
19159 regno = regno - FIRST_VFP_REGNUM;
19160 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
19162 return;
19164 case 'v':
19165 gcc_assert (CONST_DOUBLE_P (x));
19166 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
19167 return;
19169 /* Register specifier for vld1.16/vst1.16. Translate the S register
19170 number into a D register number and element index. */
19171 case 'z':
19173 int mode = GET_MODE (x);
19174 int regno;
19176 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
19178 output_operand_lossage ("invalid operand for code '%c'", code);
19179 return;
19182 regno = REGNO (x);
19183 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19185 output_operand_lossage ("invalid operand for code '%c'", code);
19186 return;
19189 regno = regno - FIRST_VFP_REGNUM;
19190 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
19192 return;
19194 default:
19195 if (x == 0)
19197 output_operand_lossage ("missing operand");
19198 return;
19201 switch (GET_CODE (x))
19203 case REG:
19204 asm_fprintf (stream, "%r", REGNO (x));
19205 break;
19207 case MEM:
19208 output_memory_reference_mode = GET_MODE (x);
19209 output_address (XEXP (x, 0));
19210 break;
19212 case CONST_DOUBLE:
19213 if (TARGET_NEON)
19215 char fpstr[20];
19216 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
19217 sizeof (fpstr), 0, 1);
19218 fprintf (stream, "#%s", fpstr);
19220 else
19221 fprintf (stream, "#%s", fp_immediate_constant (x));
19222 break;
19224 default:
19225 gcc_assert (GET_CODE (x) != NEG);
19226 fputc ('#', stream);
19227 if (GET_CODE (x) == HIGH)
19229 fputs (":lower16:", stream);
19230 x = XEXP (x, 0);
19233 output_addr_const (stream, x);
19234 break;
19239 /* Target hook for printing a memory address. */
19240 static void
19241 arm_print_operand_address (FILE *stream, rtx x)
19243 if (TARGET_32BIT)
19245 int is_minus = GET_CODE (x) == MINUS;
19247 if (REG_P (x))
19248 asm_fprintf (stream, "[%r]", REGNO (x));
19249 else if (GET_CODE (x) == PLUS || is_minus)
19251 rtx base = XEXP (x, 0);
19252 rtx index = XEXP (x, 1);
19253 HOST_WIDE_INT offset = 0;
19254 if (!REG_P (base)
19255 || (REG_P (index) && REGNO (index) == SP_REGNUM))
19257 /* Ensure that BASE is a register. */
19258 /* (one of them must be). */
19259 /* Also ensure the SP is not used as in index register. */
19260 rtx temp = base;
19261 base = index;
19262 index = temp;
19264 switch (GET_CODE (index))
19266 case CONST_INT:
19267 offset = INTVAL (index);
19268 if (is_minus)
19269 offset = -offset;
19270 asm_fprintf (stream, "[%r, #%wd]",
19271 REGNO (base), offset);
19272 break;
19274 case REG:
19275 asm_fprintf (stream, "[%r, %s%r]",
19276 REGNO (base), is_minus ? "-" : "",
19277 REGNO (index));
19278 break;
19280 case MULT:
19281 case ASHIFTRT:
19282 case LSHIFTRT:
19283 case ASHIFT:
19284 case ROTATERT:
19286 asm_fprintf (stream, "[%r, %s%r",
19287 REGNO (base), is_minus ? "-" : "",
19288 REGNO (XEXP (index, 0)));
19289 arm_print_operand (stream, index, 'S');
19290 fputs ("]", stream);
19291 break;
19294 default:
19295 gcc_unreachable ();
19298 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
19299 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
19301 extern enum machine_mode output_memory_reference_mode;
19303 gcc_assert (REG_P (XEXP (x, 0)));
19305 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
19306 asm_fprintf (stream, "[%r, #%s%d]!",
19307 REGNO (XEXP (x, 0)),
19308 GET_CODE (x) == PRE_DEC ? "-" : "",
19309 GET_MODE_SIZE (output_memory_reference_mode));
19310 else
19311 asm_fprintf (stream, "[%r], #%s%d",
19312 REGNO (XEXP (x, 0)),
19313 GET_CODE (x) == POST_DEC ? "-" : "",
19314 GET_MODE_SIZE (output_memory_reference_mode));
19316 else if (GET_CODE (x) == PRE_MODIFY)
19318 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
19319 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19320 asm_fprintf (stream, "#%wd]!",
19321 INTVAL (XEXP (XEXP (x, 1), 1)));
19322 else
19323 asm_fprintf (stream, "%r]!",
19324 REGNO (XEXP (XEXP (x, 1), 1)));
19326 else if (GET_CODE (x) == POST_MODIFY)
19328 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
19329 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19330 asm_fprintf (stream, "#%wd",
19331 INTVAL (XEXP (XEXP (x, 1), 1)));
19332 else
19333 asm_fprintf (stream, "%r",
19334 REGNO (XEXP (XEXP (x, 1), 1)));
19336 else output_addr_const (stream, x);
19338 else
19340 if (REG_P (x))
19341 asm_fprintf (stream, "[%r]", REGNO (x));
19342 else if (GET_CODE (x) == POST_INC)
19343 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
19344 else if (GET_CODE (x) == PLUS)
19346 gcc_assert (REG_P (XEXP (x, 0)));
19347 if (CONST_INT_P (XEXP (x, 1)))
19348 asm_fprintf (stream, "[%r, #%wd]",
19349 REGNO (XEXP (x, 0)),
19350 INTVAL (XEXP (x, 1)));
19351 else
19352 asm_fprintf (stream, "[%r, %r]",
19353 REGNO (XEXP (x, 0)),
19354 REGNO (XEXP (x, 1)));
19356 else
19357 output_addr_const (stream, x);
19361 /* Target hook for indicating whether a punctuation character for
19362 TARGET_PRINT_OPERAND is valid. */
19363 static bool
19364 arm_print_operand_punct_valid_p (unsigned char code)
19366 return (code == '@' || code == '|' || code == '.'
19367 || code == '(' || code == ')' || code == '#'
19368 || (TARGET_32BIT && (code == '?'))
19369 || (TARGET_THUMB2 && (code == '!'))
19370 || (TARGET_THUMB && (code == '_')));
19373 /* Target hook for assembling integer objects. The ARM version needs to
19374 handle word-sized values specially. */
19375 static bool
19376 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
19378 enum machine_mode mode;
19380 if (size == UNITS_PER_WORD && aligned_p)
19382 fputs ("\t.word\t", asm_out_file);
19383 output_addr_const (asm_out_file, x);
19385 /* Mark symbols as position independent. We only do this in the
19386 .text segment, not in the .data segment. */
19387 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
19388 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
19390 /* See legitimize_pic_address for an explanation of the
19391 TARGET_VXWORKS_RTP check. */
19392 if (TARGET_VXWORKS_RTP
19393 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
19394 fputs ("(GOT)", asm_out_file);
19395 else
19396 fputs ("(GOTOFF)", asm_out_file);
19398 fputc ('\n', asm_out_file);
19399 return true;
19402 mode = GET_MODE (x);
19404 if (arm_vector_mode_supported_p (mode))
19406 int i, units;
19408 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19410 units = CONST_VECTOR_NUNITS (x);
19411 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
19413 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19414 for (i = 0; i < units; i++)
19416 rtx elt = CONST_VECTOR_ELT (x, i);
19417 assemble_integer
19418 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
19420 else
19421 for (i = 0; i < units; i++)
19423 rtx elt = CONST_VECTOR_ELT (x, i);
19424 REAL_VALUE_TYPE rval;
19426 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
19428 assemble_real
19429 (rval, GET_MODE_INNER (mode),
19430 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
19433 return true;
19436 return default_assemble_integer (x, size, aligned_p);
19439 static void
19440 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
19442 section *s;
19444 if (!TARGET_AAPCS_BASED)
19446 (is_ctor ?
19447 default_named_section_asm_out_constructor
19448 : default_named_section_asm_out_destructor) (symbol, priority);
19449 return;
19452 /* Put these in the .init_array section, using a special relocation. */
19453 if (priority != DEFAULT_INIT_PRIORITY)
19455 char buf[18];
19456 sprintf (buf, "%s.%.5u",
19457 is_ctor ? ".init_array" : ".fini_array",
19458 priority);
19459 s = get_section (buf, SECTION_WRITE, NULL_TREE);
19461 else if (is_ctor)
19462 s = ctors_section;
19463 else
19464 s = dtors_section;
19466 switch_to_section (s);
19467 assemble_align (POINTER_SIZE);
19468 fputs ("\t.word\t", asm_out_file);
19469 output_addr_const (asm_out_file, symbol);
19470 fputs ("(target1)\n", asm_out_file);
19473 /* Add a function to the list of static constructors. */
19475 static void
19476 arm_elf_asm_constructor (rtx symbol, int priority)
19478 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
19481 /* Add a function to the list of static destructors. */
19483 static void
19484 arm_elf_asm_destructor (rtx symbol, int priority)
19486 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
19489 /* A finite state machine takes care of noticing whether or not instructions
19490 can be conditionally executed, and thus decrease execution time and code
19491 size by deleting branch instructions. The fsm is controlled by
19492 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
19494 /* The state of the fsm controlling condition codes are:
19495 0: normal, do nothing special
19496 1: make ASM_OUTPUT_OPCODE not output this instruction
19497 2: make ASM_OUTPUT_OPCODE not output this instruction
19498 3: make instructions conditional
19499 4: make instructions conditional
19501 State transitions (state->state by whom under condition):
19502 0 -> 1 final_prescan_insn if the `target' is a label
19503 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
19504 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
19505 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
19506 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
19507 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
19508 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
19509 (the target insn is arm_target_insn).
19511 If the jump clobbers the conditions then we use states 2 and 4.
19513 A similar thing can be done with conditional return insns.
19515 XXX In case the `target' is an unconditional branch, this conditionalising
19516 of the instructions always reduces code size, but not always execution
19517 time. But then, I want to reduce the code size to somewhere near what
19518 /bin/cc produces. */
19520 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
19521 instructions. When a COND_EXEC instruction is seen the subsequent
19522 instructions are scanned so that multiple conditional instructions can be
19523 combined into a single IT block. arm_condexec_count and arm_condexec_mask
19524 specify the length and true/false mask for the IT block. These will be
19525 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
19527 /* Returns the index of the ARM condition code string in
19528 `arm_condition_codes', or ARM_NV if the comparison is invalid.
19529 COMPARISON should be an rtx like `(eq (...) (...))'. */
19531 enum arm_cond_code
19532 maybe_get_arm_condition_code (rtx comparison)
19534 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
19535 enum arm_cond_code code;
19536 enum rtx_code comp_code = GET_CODE (comparison);
19538 if (GET_MODE_CLASS (mode) != MODE_CC)
19539 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
19540 XEXP (comparison, 1));
19542 switch (mode)
19544 case CC_DNEmode: code = ARM_NE; goto dominance;
19545 case CC_DEQmode: code = ARM_EQ; goto dominance;
19546 case CC_DGEmode: code = ARM_GE; goto dominance;
19547 case CC_DGTmode: code = ARM_GT; goto dominance;
19548 case CC_DLEmode: code = ARM_LE; goto dominance;
19549 case CC_DLTmode: code = ARM_LT; goto dominance;
19550 case CC_DGEUmode: code = ARM_CS; goto dominance;
19551 case CC_DGTUmode: code = ARM_HI; goto dominance;
19552 case CC_DLEUmode: code = ARM_LS; goto dominance;
19553 case CC_DLTUmode: code = ARM_CC;
19555 dominance:
19556 if (comp_code == EQ)
19557 return ARM_INVERSE_CONDITION_CODE (code);
19558 if (comp_code == NE)
19559 return code;
19560 return ARM_NV;
19562 case CC_NOOVmode:
19563 switch (comp_code)
19565 case NE: return ARM_NE;
19566 case EQ: return ARM_EQ;
19567 case GE: return ARM_PL;
19568 case LT: return ARM_MI;
19569 default: return ARM_NV;
19572 case CC_Zmode:
19573 switch (comp_code)
19575 case NE: return ARM_NE;
19576 case EQ: return ARM_EQ;
19577 default: return ARM_NV;
19580 case CC_Nmode:
19581 switch (comp_code)
19583 case NE: return ARM_MI;
19584 case EQ: return ARM_PL;
19585 default: return ARM_NV;
19588 case CCFPEmode:
19589 case CCFPmode:
19590 /* We can handle all cases except UNEQ and LTGT. */
19591 switch (comp_code)
19593 case GE: return ARM_GE;
19594 case GT: return ARM_GT;
19595 case LE: return ARM_LS;
19596 case LT: return ARM_MI;
19597 case NE: return ARM_NE;
19598 case EQ: return ARM_EQ;
19599 case ORDERED: return ARM_VC;
19600 case UNORDERED: return ARM_VS;
19601 case UNLT: return ARM_LT;
19602 case UNLE: return ARM_LE;
19603 case UNGT: return ARM_HI;
19604 case UNGE: return ARM_PL;
19605 /* UNEQ and LTGT do not have a representation. */
19606 case UNEQ: /* Fall through. */
19607 case LTGT: /* Fall through. */
19608 default: return ARM_NV;
19611 case CC_SWPmode:
19612 switch (comp_code)
19614 case NE: return ARM_NE;
19615 case EQ: return ARM_EQ;
19616 case GE: return ARM_LE;
19617 case GT: return ARM_LT;
19618 case LE: return ARM_GE;
19619 case LT: return ARM_GT;
19620 case GEU: return ARM_LS;
19621 case GTU: return ARM_CC;
19622 case LEU: return ARM_CS;
19623 case LTU: return ARM_HI;
19624 default: return ARM_NV;
19627 case CC_Cmode:
19628 switch (comp_code)
19630 case LTU: return ARM_CS;
19631 case GEU: return ARM_CC;
19632 default: return ARM_NV;
19635 case CC_CZmode:
19636 switch (comp_code)
19638 case NE: return ARM_NE;
19639 case EQ: return ARM_EQ;
19640 case GEU: return ARM_CS;
19641 case GTU: return ARM_HI;
19642 case LEU: return ARM_LS;
19643 case LTU: return ARM_CC;
19644 default: return ARM_NV;
19647 case CC_NCVmode:
19648 switch (comp_code)
19650 case GE: return ARM_GE;
19651 case LT: return ARM_LT;
19652 case GEU: return ARM_CS;
19653 case LTU: return ARM_CC;
19654 default: return ARM_NV;
19657 case CCmode:
19658 switch (comp_code)
19660 case NE: return ARM_NE;
19661 case EQ: return ARM_EQ;
19662 case GE: return ARM_GE;
19663 case GT: return ARM_GT;
19664 case LE: return ARM_LE;
19665 case LT: return ARM_LT;
19666 case GEU: return ARM_CS;
19667 case GTU: return ARM_HI;
19668 case LEU: return ARM_LS;
19669 case LTU: return ARM_CC;
19670 default: return ARM_NV;
19673 default: gcc_unreachable ();
19677 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
19678 static enum arm_cond_code
19679 get_arm_condition_code (rtx comparison)
19681 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
19682 gcc_assert (code != ARM_NV);
19683 return code;
19686 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
19687 instructions. */
19688 void
19689 thumb2_final_prescan_insn (rtx insn)
19691 rtx first_insn = insn;
19692 rtx body = PATTERN (insn);
19693 rtx predicate;
19694 enum arm_cond_code code;
19695 int n;
19696 int mask;
19697 int max;
19699 /* Maximum number of conditionally executed instructions in a block
19700 is minimum of the two max values: maximum allowed in an IT block
19701 and maximum that is beneficial according to the cost model and tune. */
19702 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
19703 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
19705 /* Remove the previous insn from the count of insns to be output. */
19706 if (arm_condexec_count)
19707 arm_condexec_count--;
19709 /* Nothing to do if we are already inside a conditional block. */
19710 if (arm_condexec_count)
19711 return;
19713 if (GET_CODE (body) != COND_EXEC)
19714 return;
19716 /* Conditional jumps are implemented directly. */
19717 if (JUMP_P (insn))
19718 return;
19720 predicate = COND_EXEC_TEST (body);
19721 arm_current_cc = get_arm_condition_code (predicate);
19723 n = get_attr_ce_count (insn);
19724 arm_condexec_count = 1;
19725 arm_condexec_mask = (1 << n) - 1;
19726 arm_condexec_masklen = n;
19727 /* See if subsequent instructions can be combined into the same block. */
19728 for (;;)
19730 insn = next_nonnote_insn (insn);
19732 /* Jumping into the middle of an IT block is illegal, so a label or
19733 barrier terminates the block. */
19734 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
19735 break;
19737 body = PATTERN (insn);
19738 /* USE and CLOBBER aren't really insns, so just skip them. */
19739 if (GET_CODE (body) == USE
19740 || GET_CODE (body) == CLOBBER)
19741 continue;
19743 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
19744 if (GET_CODE (body) != COND_EXEC)
19745 break;
19746 /* Maximum number of conditionally executed instructions in a block. */
19747 n = get_attr_ce_count (insn);
19748 if (arm_condexec_masklen + n > max)
19749 break;
19751 predicate = COND_EXEC_TEST (body);
19752 code = get_arm_condition_code (predicate);
19753 mask = (1 << n) - 1;
19754 if (arm_current_cc == code)
19755 arm_condexec_mask |= (mask << arm_condexec_masklen);
19756 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
19757 break;
19759 arm_condexec_count++;
19760 arm_condexec_masklen += n;
19762 /* A jump must be the last instruction in a conditional block. */
19763 if (JUMP_P (insn))
19764 break;
19766 /* Restore recog_data (getting the attributes of other insns can
19767 destroy this array, but final.c assumes that it remains intact
19768 across this call). */
19769 extract_constrain_insn_cached (first_insn);
19772 void
19773 arm_final_prescan_insn (rtx insn)
19775 /* BODY will hold the body of INSN. */
19776 rtx body = PATTERN (insn);
19778 /* This will be 1 if trying to repeat the trick, and things need to be
19779 reversed if it appears to fail. */
19780 int reverse = 0;
19782 /* If we start with a return insn, we only succeed if we find another one. */
19783 int seeking_return = 0;
19784 enum rtx_code return_code = UNKNOWN;
19786 /* START_INSN will hold the insn from where we start looking. This is the
19787 first insn after the following code_label if REVERSE is true. */
19788 rtx start_insn = insn;
19790 /* If in state 4, check if the target branch is reached, in order to
19791 change back to state 0. */
19792 if (arm_ccfsm_state == 4)
19794 if (insn == arm_target_insn)
19796 arm_target_insn = NULL;
19797 arm_ccfsm_state = 0;
19799 return;
19802 /* If in state 3, it is possible to repeat the trick, if this insn is an
19803 unconditional branch to a label, and immediately following this branch
19804 is the previous target label which is only used once, and the label this
19805 branch jumps to is not too far off. */
19806 if (arm_ccfsm_state == 3)
19808 if (simplejump_p (insn))
19810 start_insn = next_nonnote_insn (start_insn);
19811 if (BARRIER_P (start_insn))
19813 /* XXX Isn't this always a barrier? */
19814 start_insn = next_nonnote_insn (start_insn);
19816 if (LABEL_P (start_insn)
19817 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19818 && LABEL_NUSES (start_insn) == 1)
19819 reverse = TRUE;
19820 else
19821 return;
19823 else if (ANY_RETURN_P (body))
19825 start_insn = next_nonnote_insn (start_insn);
19826 if (BARRIER_P (start_insn))
19827 start_insn = next_nonnote_insn (start_insn);
19828 if (LABEL_P (start_insn)
19829 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19830 && LABEL_NUSES (start_insn) == 1)
19832 reverse = TRUE;
19833 seeking_return = 1;
19834 return_code = GET_CODE (body);
19836 else
19837 return;
19839 else
19840 return;
19843 gcc_assert (!arm_ccfsm_state || reverse);
19844 if (!JUMP_P (insn))
19845 return;
19847 /* This jump might be paralleled with a clobber of the condition codes
19848 the jump should always come first */
19849 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
19850 body = XVECEXP (body, 0, 0);
19852 if (reverse
19853 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
19854 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
19856 int insns_skipped;
19857 int fail = FALSE, succeed = FALSE;
19858 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
19859 int then_not_else = TRUE;
19860 rtx this_insn = start_insn, label = 0;
19862 /* Register the insn jumped to. */
19863 if (reverse)
19865 if (!seeking_return)
19866 label = XEXP (SET_SRC (body), 0);
19868 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
19869 label = XEXP (XEXP (SET_SRC (body), 1), 0);
19870 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
19872 label = XEXP (XEXP (SET_SRC (body), 2), 0);
19873 then_not_else = FALSE;
19875 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
19877 seeking_return = 1;
19878 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
19880 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
19882 seeking_return = 1;
19883 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
19884 then_not_else = FALSE;
19886 else
19887 gcc_unreachable ();
19889 /* See how many insns this branch skips, and what kind of insns. If all
19890 insns are okay, and the label or unconditional branch to the same
19891 label is not too far away, succeed. */
19892 for (insns_skipped = 0;
19893 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
19895 rtx scanbody;
19897 this_insn = next_nonnote_insn (this_insn);
19898 if (!this_insn)
19899 break;
19901 switch (GET_CODE (this_insn))
19903 case CODE_LABEL:
19904 /* Succeed if it is the target label, otherwise fail since
19905 control falls in from somewhere else. */
19906 if (this_insn == label)
19908 arm_ccfsm_state = 1;
19909 succeed = TRUE;
19911 else
19912 fail = TRUE;
19913 break;
19915 case BARRIER:
19916 /* Succeed if the following insn is the target label.
19917 Otherwise fail.
19918 If return insns are used then the last insn in a function
19919 will be a barrier. */
19920 this_insn = next_nonnote_insn (this_insn);
19921 if (this_insn && this_insn == label)
19923 arm_ccfsm_state = 1;
19924 succeed = TRUE;
19926 else
19927 fail = TRUE;
19928 break;
19930 case CALL_INSN:
19931 /* The AAPCS says that conditional calls should not be
19932 used since they make interworking inefficient (the
19933 linker can't transform BL<cond> into BLX). That's
19934 only a problem if the machine has BLX. */
19935 if (arm_arch5)
19937 fail = TRUE;
19938 break;
19941 /* Succeed if the following insn is the target label, or
19942 if the following two insns are a barrier and the
19943 target label. */
19944 this_insn = next_nonnote_insn (this_insn);
19945 if (this_insn && BARRIER_P (this_insn))
19946 this_insn = next_nonnote_insn (this_insn);
19948 if (this_insn && this_insn == label
19949 && insns_skipped < max_insns_skipped)
19951 arm_ccfsm_state = 1;
19952 succeed = TRUE;
19954 else
19955 fail = TRUE;
19956 break;
19958 case JUMP_INSN:
19959 /* If this is an unconditional branch to the same label, succeed.
19960 If it is to another label, do nothing. If it is conditional,
19961 fail. */
19962 /* XXX Probably, the tests for SET and the PC are
19963 unnecessary. */
19965 scanbody = PATTERN (this_insn);
19966 if (GET_CODE (scanbody) == SET
19967 && GET_CODE (SET_DEST (scanbody)) == PC)
19969 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19970 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19972 arm_ccfsm_state = 2;
19973 succeed = TRUE;
19975 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19976 fail = TRUE;
19978 /* Fail if a conditional return is undesirable (e.g. on a
19979 StrongARM), but still allow this if optimizing for size. */
19980 else if (GET_CODE (scanbody) == return_code
19981 && !use_return_insn (TRUE, NULL)
19982 && !optimize_size)
19983 fail = TRUE;
19984 else if (GET_CODE (scanbody) == return_code)
19986 arm_ccfsm_state = 2;
19987 succeed = TRUE;
19989 else if (GET_CODE (scanbody) == PARALLEL)
19991 switch (get_attr_conds (this_insn))
19993 case CONDS_NOCOND:
19994 break;
19995 default:
19996 fail = TRUE;
19997 break;
20000 else
20001 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
20003 break;
20005 case INSN:
20006 /* Instructions using or affecting the condition codes make it
20007 fail. */
20008 scanbody = PATTERN (this_insn);
20009 if (!(GET_CODE (scanbody) == SET
20010 || GET_CODE (scanbody) == PARALLEL)
20011 || get_attr_conds (this_insn) != CONDS_NOCOND)
20012 fail = TRUE;
20013 break;
20015 default:
20016 break;
20019 if (succeed)
20021 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
20022 arm_target_label = CODE_LABEL_NUMBER (label);
20023 else
20025 gcc_assert (seeking_return || arm_ccfsm_state == 2);
20027 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
20029 this_insn = next_nonnote_insn (this_insn);
20030 gcc_assert (!this_insn
20031 || (!BARRIER_P (this_insn)
20032 && !LABEL_P (this_insn)));
20034 if (!this_insn)
20036 /* Oh, dear! we ran off the end.. give up. */
20037 extract_constrain_insn_cached (insn);
20038 arm_ccfsm_state = 0;
20039 arm_target_insn = NULL;
20040 return;
20042 arm_target_insn = this_insn;
20045 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
20046 what it was. */
20047 if (!reverse)
20048 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
20050 if (reverse || then_not_else)
20051 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
20054 /* Restore recog_data (getting the attributes of other insns can
20055 destroy this array, but final.c assumes that it remains intact
20056 across this call. */
20057 extract_constrain_insn_cached (insn);
20061 /* Output IT instructions. */
20062 void
20063 thumb2_asm_output_opcode (FILE * stream)
20065 char buff[5];
20066 int n;
20068 if (arm_condexec_mask)
20070 for (n = 0; n < arm_condexec_masklen; n++)
20071 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
20072 buff[n] = 0;
20073 asm_fprintf(stream, "i%s\t%s\n\t", buff,
20074 arm_condition_codes[arm_current_cc]);
20075 arm_condexec_mask = 0;
20079 /* Returns true if REGNO is a valid register
20080 for holding a quantity of type MODE. */
20082 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
20084 if (GET_MODE_CLASS (mode) == MODE_CC)
20085 return (regno == CC_REGNUM
20086 || (TARGET_HARD_FLOAT && TARGET_VFP
20087 && regno == VFPCC_REGNUM));
20089 if (TARGET_THUMB1)
20090 /* For the Thumb we only allow values bigger than SImode in
20091 registers 0 - 6, so that there is always a second low
20092 register available to hold the upper part of the value.
20093 We probably we ought to ensure that the register is the
20094 start of an even numbered register pair. */
20095 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
20097 if (TARGET_HARD_FLOAT && TARGET_VFP
20098 && IS_VFP_REGNUM (regno))
20100 if (mode == SFmode || mode == SImode)
20101 return VFP_REGNO_OK_FOR_SINGLE (regno);
20103 if (mode == DFmode)
20104 return VFP_REGNO_OK_FOR_DOUBLE (regno);
20106 /* VFP registers can hold HFmode values, but there is no point in
20107 putting them there unless we have hardware conversion insns. */
20108 if (mode == HFmode)
20109 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
20111 if (TARGET_NEON)
20112 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
20113 || (VALID_NEON_QREG_MODE (mode)
20114 && NEON_REGNO_OK_FOR_QUAD (regno))
20115 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
20116 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
20117 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
20118 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
20119 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
20121 return FALSE;
20124 if (TARGET_REALLY_IWMMXT)
20126 if (IS_IWMMXT_GR_REGNUM (regno))
20127 return mode == SImode;
20129 if (IS_IWMMXT_REGNUM (regno))
20130 return VALID_IWMMXT_REG_MODE (mode);
20133 /* We allow almost any value to be stored in the general registers.
20134 Restrict doubleword quantities to even register pairs so that we can
20135 use ldrd. Do not allow very large Neon structure opaque modes in
20136 general registers; they would use too many. */
20137 if (regno <= LAST_ARM_REGNUM)
20138 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
20139 && ARM_NUM_REGS (mode) <= 4;
20141 if (regno == FRAME_POINTER_REGNUM
20142 || regno == ARG_POINTER_REGNUM)
20143 /* We only allow integers in the fake hard registers. */
20144 return GET_MODE_CLASS (mode) == MODE_INT;
20146 return FALSE;
20149 /* Implement MODES_TIEABLE_P. */
20151 bool
20152 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20154 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
20155 return true;
20157 /* We specifically want to allow elements of "structure" modes to
20158 be tieable to the structure. This more general condition allows
20159 other rarer situations too. */
20160 if (TARGET_NEON
20161 && (VALID_NEON_DREG_MODE (mode1)
20162 || VALID_NEON_QREG_MODE (mode1)
20163 || VALID_NEON_STRUCT_MODE (mode1))
20164 && (VALID_NEON_DREG_MODE (mode2)
20165 || VALID_NEON_QREG_MODE (mode2)
20166 || VALID_NEON_STRUCT_MODE (mode2)))
20167 return true;
20169 return false;
20172 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
20173 not used in arm mode. */
20175 enum reg_class
20176 arm_regno_class (int regno)
20178 if (TARGET_THUMB1)
20180 if (regno == STACK_POINTER_REGNUM)
20181 return STACK_REG;
20182 if (regno == CC_REGNUM)
20183 return CC_REG;
20184 if (regno < 8)
20185 return LO_REGS;
20186 return HI_REGS;
20189 if (TARGET_THUMB2 && regno < 8)
20190 return LO_REGS;
20192 if ( regno <= LAST_ARM_REGNUM
20193 || regno == FRAME_POINTER_REGNUM
20194 || regno == ARG_POINTER_REGNUM)
20195 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
20197 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
20198 return TARGET_THUMB2 ? CC_REG : NO_REGS;
20200 if (IS_VFP_REGNUM (regno))
20202 if (regno <= D7_VFP_REGNUM)
20203 return VFP_D0_D7_REGS;
20204 else if (regno <= LAST_LO_VFP_REGNUM)
20205 return VFP_LO_REGS;
20206 else
20207 return VFP_HI_REGS;
20210 if (IS_IWMMXT_REGNUM (regno))
20211 return IWMMXT_REGS;
20213 if (IS_IWMMXT_GR_REGNUM (regno))
20214 return IWMMXT_GR_REGS;
20216 return NO_REGS;
20219 /* Handle a special case when computing the offset
20220 of an argument from the frame pointer. */
20222 arm_debugger_arg_offset (int value, rtx addr)
20224 rtx insn;
20226 /* We are only interested if dbxout_parms() failed to compute the offset. */
20227 if (value != 0)
20228 return 0;
20230 /* We can only cope with the case where the address is held in a register. */
20231 if (!REG_P (addr))
20232 return 0;
20234 /* If we are using the frame pointer to point at the argument, then
20235 an offset of 0 is correct. */
20236 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
20237 return 0;
20239 /* If we are using the stack pointer to point at the
20240 argument, then an offset of 0 is correct. */
20241 /* ??? Check this is consistent with thumb2 frame layout. */
20242 if ((TARGET_THUMB || !frame_pointer_needed)
20243 && REGNO (addr) == SP_REGNUM)
20244 return 0;
20246 /* Oh dear. The argument is pointed to by a register rather
20247 than being held in a register, or being stored at a known
20248 offset from the frame pointer. Since GDB only understands
20249 those two kinds of argument we must translate the address
20250 held in the register into an offset from the frame pointer.
20251 We do this by searching through the insns for the function
20252 looking to see where this register gets its value. If the
20253 register is initialized from the frame pointer plus an offset
20254 then we are in luck and we can continue, otherwise we give up.
20256 This code is exercised by producing debugging information
20257 for a function with arguments like this:
20259 double func (double a, double b, int c, double d) {return d;}
20261 Without this code the stab for parameter 'd' will be set to
20262 an offset of 0 from the frame pointer, rather than 8. */
20264 /* The if() statement says:
20266 If the insn is a normal instruction
20267 and if the insn is setting the value in a register
20268 and if the register being set is the register holding the address of the argument
20269 and if the address is computing by an addition
20270 that involves adding to a register
20271 which is the frame pointer
20272 a constant integer
20274 then... */
20276 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20278 if ( NONJUMP_INSN_P (insn)
20279 && GET_CODE (PATTERN (insn)) == SET
20280 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
20281 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
20282 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
20283 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
20284 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
20287 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
20289 break;
20293 if (value == 0)
20295 debug_rtx (addr);
20296 warning (0, "unable to compute real location of stacked parameter");
20297 value = 8; /* XXX magic hack */
20300 return value;
20303 typedef enum {
20304 T_V8QI,
20305 T_V4HI,
20306 T_V4HF,
20307 T_V2SI,
20308 T_V2SF,
20309 T_DI,
20310 T_V16QI,
20311 T_V8HI,
20312 T_V4SI,
20313 T_V4SF,
20314 T_V2DI,
20315 T_TI,
20316 T_EI,
20317 T_OI,
20318 T_MAX /* Size of enum. Keep last. */
20319 } neon_builtin_type_mode;
20321 #define TYPE_MODE_BIT(X) (1 << (X))
20323 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
20324 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
20325 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
20326 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
20327 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
20328 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
20330 #define v8qi_UP T_V8QI
20331 #define v4hi_UP T_V4HI
20332 #define v4hf_UP T_V4HF
20333 #define v2si_UP T_V2SI
20334 #define v2sf_UP T_V2SF
20335 #define di_UP T_DI
20336 #define v16qi_UP T_V16QI
20337 #define v8hi_UP T_V8HI
20338 #define v4si_UP T_V4SI
20339 #define v4sf_UP T_V4SF
20340 #define v2di_UP T_V2DI
20341 #define ti_UP T_TI
20342 #define ei_UP T_EI
20343 #define oi_UP T_OI
20345 #define UP(X) X##_UP
20347 typedef enum {
20348 NEON_BINOP,
20349 NEON_TERNOP,
20350 NEON_UNOP,
20351 NEON_GETLANE,
20352 NEON_SETLANE,
20353 NEON_CREATE,
20354 NEON_RINT,
20355 NEON_DUP,
20356 NEON_DUPLANE,
20357 NEON_COMBINE,
20358 NEON_SPLIT,
20359 NEON_LANEMUL,
20360 NEON_LANEMULL,
20361 NEON_LANEMULH,
20362 NEON_LANEMAC,
20363 NEON_SCALARMUL,
20364 NEON_SCALARMULL,
20365 NEON_SCALARMULH,
20366 NEON_SCALARMAC,
20367 NEON_CONVERT,
20368 NEON_FLOAT_WIDEN,
20369 NEON_FLOAT_NARROW,
20370 NEON_FIXCONV,
20371 NEON_SELECT,
20372 NEON_RESULTPAIR,
20373 NEON_REINTERP,
20374 NEON_VTBL,
20375 NEON_VTBX,
20376 NEON_LOAD1,
20377 NEON_LOAD1LANE,
20378 NEON_STORE1,
20379 NEON_STORE1LANE,
20380 NEON_LOADSTRUCT,
20381 NEON_LOADSTRUCTLANE,
20382 NEON_STORESTRUCT,
20383 NEON_STORESTRUCTLANE,
20384 NEON_LOGICBINOP,
20385 NEON_SHIFTINSERT,
20386 NEON_SHIFTIMM,
20387 NEON_SHIFTACC
20388 } neon_itype;
20390 typedef struct {
20391 const char *name;
20392 const neon_itype itype;
20393 const neon_builtin_type_mode mode;
20394 const enum insn_code code;
20395 unsigned int fcode;
20396 } neon_builtin_datum;
20398 #define CF(N,X) CODE_FOR_neon_##N##X
20400 #define VAR1(T, N, A) \
20401 {#N, NEON_##T, UP (A), CF (N, A), 0}
20402 #define VAR2(T, N, A, B) \
20403 VAR1 (T, N, A), \
20404 {#N, NEON_##T, UP (B), CF (N, B), 0}
20405 #define VAR3(T, N, A, B, C) \
20406 VAR2 (T, N, A, B), \
20407 {#N, NEON_##T, UP (C), CF (N, C), 0}
20408 #define VAR4(T, N, A, B, C, D) \
20409 VAR3 (T, N, A, B, C), \
20410 {#N, NEON_##T, UP (D), CF (N, D), 0}
20411 #define VAR5(T, N, A, B, C, D, E) \
20412 VAR4 (T, N, A, B, C, D), \
20413 {#N, NEON_##T, UP (E), CF (N, E), 0}
20414 #define VAR6(T, N, A, B, C, D, E, F) \
20415 VAR5 (T, N, A, B, C, D, E), \
20416 {#N, NEON_##T, UP (F), CF (N, F), 0}
20417 #define VAR7(T, N, A, B, C, D, E, F, G) \
20418 VAR6 (T, N, A, B, C, D, E, F), \
20419 {#N, NEON_##T, UP (G), CF (N, G), 0}
20420 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20421 VAR7 (T, N, A, B, C, D, E, F, G), \
20422 {#N, NEON_##T, UP (H), CF (N, H), 0}
20423 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20424 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20425 {#N, NEON_##T, UP (I), CF (N, I), 0}
20426 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20427 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20428 {#N, NEON_##T, UP (J), CF (N, J), 0}
20430 /* The NEON builtin data can be found in arm_neon_builtins.def.
20431 The mode entries in the following table correspond to the "key" type of the
20432 instruction variant, i.e. equivalent to that which would be specified after
20433 the assembler mnemonic, which usually refers to the last vector operand.
20434 (Signed/unsigned/polynomial types are not differentiated between though, and
20435 are all mapped onto the same mode for a given element size.) The modes
20436 listed per instruction should be the same as those defined for that
20437 instruction's pattern in neon.md. */
20439 static neon_builtin_datum neon_builtin_data[] =
20441 #include "arm_neon_builtins.def"
20444 #undef CF
20445 #undef VAR1
20446 #undef VAR2
20447 #undef VAR3
20448 #undef VAR4
20449 #undef VAR5
20450 #undef VAR6
20451 #undef VAR7
20452 #undef VAR8
20453 #undef VAR9
20454 #undef VAR10
20456 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
20457 #define VAR1(T, N, A) \
20458 CF (N, A)
20459 #define VAR2(T, N, A, B) \
20460 VAR1 (T, N, A), \
20461 CF (N, B)
20462 #define VAR3(T, N, A, B, C) \
20463 VAR2 (T, N, A, B), \
20464 CF (N, C)
20465 #define VAR4(T, N, A, B, C, D) \
20466 VAR3 (T, N, A, B, C), \
20467 CF (N, D)
20468 #define VAR5(T, N, A, B, C, D, E) \
20469 VAR4 (T, N, A, B, C, D), \
20470 CF (N, E)
20471 #define VAR6(T, N, A, B, C, D, E, F) \
20472 VAR5 (T, N, A, B, C, D, E), \
20473 CF (N, F)
20474 #define VAR7(T, N, A, B, C, D, E, F, G) \
20475 VAR6 (T, N, A, B, C, D, E, F), \
20476 CF (N, G)
20477 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20478 VAR7 (T, N, A, B, C, D, E, F, G), \
20479 CF (N, H)
20480 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20481 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20482 CF (N, I)
20483 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20484 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20485 CF (N, J)
20486 enum arm_builtins
20488 ARM_BUILTIN_GETWCGR0,
20489 ARM_BUILTIN_GETWCGR1,
20490 ARM_BUILTIN_GETWCGR2,
20491 ARM_BUILTIN_GETWCGR3,
20493 ARM_BUILTIN_SETWCGR0,
20494 ARM_BUILTIN_SETWCGR1,
20495 ARM_BUILTIN_SETWCGR2,
20496 ARM_BUILTIN_SETWCGR3,
20498 ARM_BUILTIN_WZERO,
20500 ARM_BUILTIN_WAVG2BR,
20501 ARM_BUILTIN_WAVG2HR,
20502 ARM_BUILTIN_WAVG2B,
20503 ARM_BUILTIN_WAVG2H,
20505 ARM_BUILTIN_WACCB,
20506 ARM_BUILTIN_WACCH,
20507 ARM_BUILTIN_WACCW,
20509 ARM_BUILTIN_WMACS,
20510 ARM_BUILTIN_WMACSZ,
20511 ARM_BUILTIN_WMACU,
20512 ARM_BUILTIN_WMACUZ,
20514 ARM_BUILTIN_WSADB,
20515 ARM_BUILTIN_WSADBZ,
20516 ARM_BUILTIN_WSADH,
20517 ARM_BUILTIN_WSADHZ,
20519 ARM_BUILTIN_WALIGNI,
20520 ARM_BUILTIN_WALIGNR0,
20521 ARM_BUILTIN_WALIGNR1,
20522 ARM_BUILTIN_WALIGNR2,
20523 ARM_BUILTIN_WALIGNR3,
20525 ARM_BUILTIN_TMIA,
20526 ARM_BUILTIN_TMIAPH,
20527 ARM_BUILTIN_TMIABB,
20528 ARM_BUILTIN_TMIABT,
20529 ARM_BUILTIN_TMIATB,
20530 ARM_BUILTIN_TMIATT,
20532 ARM_BUILTIN_TMOVMSKB,
20533 ARM_BUILTIN_TMOVMSKH,
20534 ARM_BUILTIN_TMOVMSKW,
20536 ARM_BUILTIN_TBCSTB,
20537 ARM_BUILTIN_TBCSTH,
20538 ARM_BUILTIN_TBCSTW,
20540 ARM_BUILTIN_WMADDS,
20541 ARM_BUILTIN_WMADDU,
20543 ARM_BUILTIN_WPACKHSS,
20544 ARM_BUILTIN_WPACKWSS,
20545 ARM_BUILTIN_WPACKDSS,
20546 ARM_BUILTIN_WPACKHUS,
20547 ARM_BUILTIN_WPACKWUS,
20548 ARM_BUILTIN_WPACKDUS,
20550 ARM_BUILTIN_WADDB,
20551 ARM_BUILTIN_WADDH,
20552 ARM_BUILTIN_WADDW,
20553 ARM_BUILTIN_WADDSSB,
20554 ARM_BUILTIN_WADDSSH,
20555 ARM_BUILTIN_WADDSSW,
20556 ARM_BUILTIN_WADDUSB,
20557 ARM_BUILTIN_WADDUSH,
20558 ARM_BUILTIN_WADDUSW,
20559 ARM_BUILTIN_WSUBB,
20560 ARM_BUILTIN_WSUBH,
20561 ARM_BUILTIN_WSUBW,
20562 ARM_BUILTIN_WSUBSSB,
20563 ARM_BUILTIN_WSUBSSH,
20564 ARM_BUILTIN_WSUBSSW,
20565 ARM_BUILTIN_WSUBUSB,
20566 ARM_BUILTIN_WSUBUSH,
20567 ARM_BUILTIN_WSUBUSW,
20569 ARM_BUILTIN_WAND,
20570 ARM_BUILTIN_WANDN,
20571 ARM_BUILTIN_WOR,
20572 ARM_BUILTIN_WXOR,
20574 ARM_BUILTIN_WCMPEQB,
20575 ARM_BUILTIN_WCMPEQH,
20576 ARM_BUILTIN_WCMPEQW,
20577 ARM_BUILTIN_WCMPGTUB,
20578 ARM_BUILTIN_WCMPGTUH,
20579 ARM_BUILTIN_WCMPGTUW,
20580 ARM_BUILTIN_WCMPGTSB,
20581 ARM_BUILTIN_WCMPGTSH,
20582 ARM_BUILTIN_WCMPGTSW,
20584 ARM_BUILTIN_TEXTRMSB,
20585 ARM_BUILTIN_TEXTRMSH,
20586 ARM_BUILTIN_TEXTRMSW,
20587 ARM_BUILTIN_TEXTRMUB,
20588 ARM_BUILTIN_TEXTRMUH,
20589 ARM_BUILTIN_TEXTRMUW,
20590 ARM_BUILTIN_TINSRB,
20591 ARM_BUILTIN_TINSRH,
20592 ARM_BUILTIN_TINSRW,
20594 ARM_BUILTIN_WMAXSW,
20595 ARM_BUILTIN_WMAXSH,
20596 ARM_BUILTIN_WMAXSB,
20597 ARM_BUILTIN_WMAXUW,
20598 ARM_BUILTIN_WMAXUH,
20599 ARM_BUILTIN_WMAXUB,
20600 ARM_BUILTIN_WMINSW,
20601 ARM_BUILTIN_WMINSH,
20602 ARM_BUILTIN_WMINSB,
20603 ARM_BUILTIN_WMINUW,
20604 ARM_BUILTIN_WMINUH,
20605 ARM_BUILTIN_WMINUB,
20607 ARM_BUILTIN_WMULUM,
20608 ARM_BUILTIN_WMULSM,
20609 ARM_BUILTIN_WMULUL,
20611 ARM_BUILTIN_PSADBH,
20612 ARM_BUILTIN_WSHUFH,
20614 ARM_BUILTIN_WSLLH,
20615 ARM_BUILTIN_WSLLW,
20616 ARM_BUILTIN_WSLLD,
20617 ARM_BUILTIN_WSRAH,
20618 ARM_BUILTIN_WSRAW,
20619 ARM_BUILTIN_WSRAD,
20620 ARM_BUILTIN_WSRLH,
20621 ARM_BUILTIN_WSRLW,
20622 ARM_BUILTIN_WSRLD,
20623 ARM_BUILTIN_WRORH,
20624 ARM_BUILTIN_WRORW,
20625 ARM_BUILTIN_WRORD,
20626 ARM_BUILTIN_WSLLHI,
20627 ARM_BUILTIN_WSLLWI,
20628 ARM_BUILTIN_WSLLDI,
20629 ARM_BUILTIN_WSRAHI,
20630 ARM_BUILTIN_WSRAWI,
20631 ARM_BUILTIN_WSRADI,
20632 ARM_BUILTIN_WSRLHI,
20633 ARM_BUILTIN_WSRLWI,
20634 ARM_BUILTIN_WSRLDI,
20635 ARM_BUILTIN_WRORHI,
20636 ARM_BUILTIN_WRORWI,
20637 ARM_BUILTIN_WRORDI,
20639 ARM_BUILTIN_WUNPCKIHB,
20640 ARM_BUILTIN_WUNPCKIHH,
20641 ARM_BUILTIN_WUNPCKIHW,
20642 ARM_BUILTIN_WUNPCKILB,
20643 ARM_BUILTIN_WUNPCKILH,
20644 ARM_BUILTIN_WUNPCKILW,
20646 ARM_BUILTIN_WUNPCKEHSB,
20647 ARM_BUILTIN_WUNPCKEHSH,
20648 ARM_BUILTIN_WUNPCKEHSW,
20649 ARM_BUILTIN_WUNPCKEHUB,
20650 ARM_BUILTIN_WUNPCKEHUH,
20651 ARM_BUILTIN_WUNPCKEHUW,
20652 ARM_BUILTIN_WUNPCKELSB,
20653 ARM_BUILTIN_WUNPCKELSH,
20654 ARM_BUILTIN_WUNPCKELSW,
20655 ARM_BUILTIN_WUNPCKELUB,
20656 ARM_BUILTIN_WUNPCKELUH,
20657 ARM_BUILTIN_WUNPCKELUW,
20659 ARM_BUILTIN_WABSB,
20660 ARM_BUILTIN_WABSH,
20661 ARM_BUILTIN_WABSW,
20663 ARM_BUILTIN_WADDSUBHX,
20664 ARM_BUILTIN_WSUBADDHX,
20666 ARM_BUILTIN_WABSDIFFB,
20667 ARM_BUILTIN_WABSDIFFH,
20668 ARM_BUILTIN_WABSDIFFW,
20670 ARM_BUILTIN_WADDCH,
20671 ARM_BUILTIN_WADDCW,
20673 ARM_BUILTIN_WAVG4,
20674 ARM_BUILTIN_WAVG4R,
20676 ARM_BUILTIN_WMADDSX,
20677 ARM_BUILTIN_WMADDUX,
20679 ARM_BUILTIN_WMADDSN,
20680 ARM_BUILTIN_WMADDUN,
20682 ARM_BUILTIN_WMULWSM,
20683 ARM_BUILTIN_WMULWUM,
20685 ARM_BUILTIN_WMULWSMR,
20686 ARM_BUILTIN_WMULWUMR,
20688 ARM_BUILTIN_WMULWL,
20690 ARM_BUILTIN_WMULSMR,
20691 ARM_BUILTIN_WMULUMR,
20693 ARM_BUILTIN_WQMULM,
20694 ARM_BUILTIN_WQMULMR,
20696 ARM_BUILTIN_WQMULWM,
20697 ARM_BUILTIN_WQMULWMR,
20699 ARM_BUILTIN_WADDBHUSM,
20700 ARM_BUILTIN_WADDBHUSL,
20702 ARM_BUILTIN_WQMIABB,
20703 ARM_BUILTIN_WQMIABT,
20704 ARM_BUILTIN_WQMIATB,
20705 ARM_BUILTIN_WQMIATT,
20707 ARM_BUILTIN_WQMIABBN,
20708 ARM_BUILTIN_WQMIABTN,
20709 ARM_BUILTIN_WQMIATBN,
20710 ARM_BUILTIN_WQMIATTN,
20712 ARM_BUILTIN_WMIABB,
20713 ARM_BUILTIN_WMIABT,
20714 ARM_BUILTIN_WMIATB,
20715 ARM_BUILTIN_WMIATT,
20717 ARM_BUILTIN_WMIABBN,
20718 ARM_BUILTIN_WMIABTN,
20719 ARM_BUILTIN_WMIATBN,
20720 ARM_BUILTIN_WMIATTN,
20722 ARM_BUILTIN_WMIAWBB,
20723 ARM_BUILTIN_WMIAWBT,
20724 ARM_BUILTIN_WMIAWTB,
20725 ARM_BUILTIN_WMIAWTT,
20727 ARM_BUILTIN_WMIAWBBN,
20728 ARM_BUILTIN_WMIAWBTN,
20729 ARM_BUILTIN_WMIAWTBN,
20730 ARM_BUILTIN_WMIAWTTN,
20732 ARM_BUILTIN_WMERGE,
20734 ARM_BUILTIN_CRC32B,
20735 ARM_BUILTIN_CRC32H,
20736 ARM_BUILTIN_CRC32W,
20737 ARM_BUILTIN_CRC32CB,
20738 ARM_BUILTIN_CRC32CH,
20739 ARM_BUILTIN_CRC32CW,
20741 #undef CRYPTO1
20742 #undef CRYPTO2
20743 #undef CRYPTO3
20745 #define CRYPTO1(L, U, M1, M2) \
20746 ARM_BUILTIN_CRYPTO_##U,
20747 #define CRYPTO2(L, U, M1, M2, M3) \
20748 ARM_BUILTIN_CRYPTO_##U,
20749 #define CRYPTO3(L, U, M1, M2, M3, M4) \
20750 ARM_BUILTIN_CRYPTO_##U,
20752 #include "crypto.def"
20754 #undef CRYPTO1
20755 #undef CRYPTO2
20756 #undef CRYPTO3
20758 #include "arm_neon_builtins.def"
20760 ,ARM_BUILTIN_MAX
20763 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
20765 #undef CF
20766 #undef VAR1
20767 #undef VAR2
20768 #undef VAR3
20769 #undef VAR4
20770 #undef VAR5
20771 #undef VAR6
20772 #undef VAR7
20773 #undef VAR8
20774 #undef VAR9
20775 #undef VAR10
20777 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
20779 #define NUM_DREG_TYPES 5
20780 #define NUM_QREG_TYPES 6
20782 static void
20783 arm_init_neon_builtins (void)
20785 unsigned int i, fcode;
20786 tree decl;
20788 tree neon_intQI_type_node;
20789 tree neon_intHI_type_node;
20790 tree neon_floatHF_type_node;
20791 tree neon_polyQI_type_node;
20792 tree neon_polyHI_type_node;
20793 tree neon_intSI_type_node;
20794 tree neon_intDI_type_node;
20795 tree neon_intUTI_type_node;
20796 tree neon_float_type_node;
20798 tree intQI_pointer_node;
20799 tree intHI_pointer_node;
20800 tree intSI_pointer_node;
20801 tree intDI_pointer_node;
20802 tree float_pointer_node;
20804 tree const_intQI_node;
20805 tree const_intHI_node;
20806 tree const_intSI_node;
20807 tree const_intDI_node;
20808 tree const_float_node;
20810 tree const_intQI_pointer_node;
20811 tree const_intHI_pointer_node;
20812 tree const_intSI_pointer_node;
20813 tree const_intDI_pointer_node;
20814 tree const_float_pointer_node;
20816 tree V8QI_type_node;
20817 tree V4HI_type_node;
20818 tree V4HF_type_node;
20819 tree V2SI_type_node;
20820 tree V2SF_type_node;
20821 tree V16QI_type_node;
20822 tree V8HI_type_node;
20823 tree V4SI_type_node;
20824 tree V4SF_type_node;
20825 tree V2DI_type_node;
20827 tree intUQI_type_node;
20828 tree intUHI_type_node;
20829 tree intUSI_type_node;
20830 tree intUDI_type_node;
20832 tree intEI_type_node;
20833 tree intOI_type_node;
20834 tree intCI_type_node;
20835 tree intXI_type_node;
20837 tree V8QI_pointer_node;
20838 tree V4HI_pointer_node;
20839 tree V2SI_pointer_node;
20840 tree V2SF_pointer_node;
20841 tree V16QI_pointer_node;
20842 tree V8HI_pointer_node;
20843 tree V4SI_pointer_node;
20844 tree V4SF_pointer_node;
20845 tree V2DI_pointer_node;
20847 tree void_ftype_pv8qi_v8qi_v8qi;
20848 tree void_ftype_pv4hi_v4hi_v4hi;
20849 tree void_ftype_pv2si_v2si_v2si;
20850 tree void_ftype_pv2sf_v2sf_v2sf;
20851 tree void_ftype_pdi_di_di;
20852 tree void_ftype_pv16qi_v16qi_v16qi;
20853 tree void_ftype_pv8hi_v8hi_v8hi;
20854 tree void_ftype_pv4si_v4si_v4si;
20855 tree void_ftype_pv4sf_v4sf_v4sf;
20856 tree void_ftype_pv2di_v2di_v2di;
20858 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
20859 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
20860 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
20862 /* Create distinguished type nodes for NEON vector element types,
20863 and pointers to values of such types, so we can detect them later. */
20864 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20865 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20866 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20867 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20868 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20869 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20870 neon_float_type_node = make_node (REAL_TYPE);
20871 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20872 layout_type (neon_float_type_node);
20873 neon_floatHF_type_node = make_node (REAL_TYPE);
20874 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
20875 layout_type (neon_floatHF_type_node);
20877 /* Define typedefs which exactly correspond to the modes we are basing vector
20878 types on. If you change these names you'll need to change
20879 the table used by arm_mangle_type too. */
20880 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20881 "__builtin_neon_qi");
20882 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20883 "__builtin_neon_hi");
20884 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
20885 "__builtin_neon_hf");
20886 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20887 "__builtin_neon_si");
20888 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20889 "__builtin_neon_sf");
20890 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20891 "__builtin_neon_di");
20892 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20893 "__builtin_neon_poly8");
20894 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20895 "__builtin_neon_poly16");
20897 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20898 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20899 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20900 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20901 float_pointer_node = build_pointer_type (neon_float_type_node);
20903 /* Next create constant-qualified versions of the above types. */
20904 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20905 TYPE_QUAL_CONST);
20906 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20907 TYPE_QUAL_CONST);
20908 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20909 TYPE_QUAL_CONST);
20910 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20911 TYPE_QUAL_CONST);
20912 const_float_node = build_qualified_type (neon_float_type_node,
20913 TYPE_QUAL_CONST);
20915 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20916 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20917 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20918 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20919 const_float_pointer_node = build_pointer_type (const_float_node);
20921 /* Now create vector types based on our NEON element types. */
20922 /* 64-bit vectors. */
20923 V8QI_type_node =
20924 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20925 V4HI_type_node =
20926 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20927 V4HF_type_node =
20928 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
20929 V2SI_type_node =
20930 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20931 V2SF_type_node =
20932 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20933 /* 128-bit vectors. */
20934 V16QI_type_node =
20935 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20936 V8HI_type_node =
20937 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20938 V4SI_type_node =
20939 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20940 V4SF_type_node =
20941 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20942 V2DI_type_node =
20943 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20945 /* Unsigned integer types for various mode sizes. */
20946 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20947 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20948 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20949 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20950 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
20953 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20954 "__builtin_neon_uqi");
20955 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20956 "__builtin_neon_uhi");
20957 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20958 "__builtin_neon_usi");
20959 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20960 "__builtin_neon_udi");
20961 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20962 "__builtin_neon_poly64");
20963 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
20964 "__builtin_neon_poly128");
20966 /* Opaque integer types for structures of vectors. */
20967 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20968 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20969 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20970 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20972 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20973 "__builtin_neon_ti");
20974 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20975 "__builtin_neon_ei");
20976 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20977 "__builtin_neon_oi");
20978 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20979 "__builtin_neon_ci");
20980 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20981 "__builtin_neon_xi");
20983 /* Pointers to vector types. */
20984 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20985 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20986 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20987 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20988 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20989 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20990 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20991 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20992 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20994 /* Operations which return results as pairs. */
20995 void_ftype_pv8qi_v8qi_v8qi =
20996 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20997 V8QI_type_node, NULL);
20998 void_ftype_pv4hi_v4hi_v4hi =
20999 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
21000 V4HI_type_node, NULL);
21001 void_ftype_pv2si_v2si_v2si =
21002 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
21003 V2SI_type_node, NULL);
21004 void_ftype_pv2sf_v2sf_v2sf =
21005 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
21006 V2SF_type_node, NULL);
21007 void_ftype_pdi_di_di =
21008 build_function_type_list (void_type_node, intDI_pointer_node,
21009 neon_intDI_type_node, neon_intDI_type_node, NULL);
21010 void_ftype_pv16qi_v16qi_v16qi =
21011 build_function_type_list (void_type_node, V16QI_pointer_node,
21012 V16QI_type_node, V16QI_type_node, NULL);
21013 void_ftype_pv8hi_v8hi_v8hi =
21014 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
21015 V8HI_type_node, NULL);
21016 void_ftype_pv4si_v4si_v4si =
21017 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
21018 V4SI_type_node, NULL);
21019 void_ftype_pv4sf_v4sf_v4sf =
21020 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
21021 V4SF_type_node, NULL);
21022 void_ftype_pv2di_v2di_v2di =
21023 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
21024 V2DI_type_node, NULL);
21026 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
21028 tree V4USI_type_node =
21029 build_vector_type_for_mode (intUSI_type_node, V4SImode);
21031 tree V16UQI_type_node =
21032 build_vector_type_for_mode (intUQI_type_node, V16QImode);
21034 tree v16uqi_ftype_v16uqi
21035 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
21037 tree v16uqi_ftype_v16uqi_v16uqi
21038 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
21039 V16UQI_type_node, NULL_TREE);
21041 tree v4usi_ftype_v4usi
21042 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
21044 tree v4usi_ftype_v4usi_v4usi
21045 = build_function_type_list (V4USI_type_node, V4USI_type_node,
21046 V4USI_type_node, NULL_TREE);
21048 tree v4usi_ftype_v4usi_v4usi_v4usi
21049 = build_function_type_list (V4USI_type_node, V4USI_type_node,
21050 V4USI_type_node, V4USI_type_node, NULL_TREE);
21052 tree uti_ftype_udi_udi
21053 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
21054 intUDI_type_node, NULL_TREE);
21056 #undef CRYPTO1
21057 #undef CRYPTO2
21058 #undef CRYPTO3
21059 #undef C
21060 #undef N
21061 #undef CF
21062 #undef FT1
21063 #undef FT2
21064 #undef FT3
21066 #define C(U) \
21067 ARM_BUILTIN_CRYPTO_##U
21068 #define N(L) \
21069 "__builtin_arm_crypto_"#L
21070 #define FT1(R, A) \
21071 R##_ftype_##A
21072 #define FT2(R, A1, A2) \
21073 R##_ftype_##A1##_##A2
21074 #define FT3(R, A1, A2, A3) \
21075 R##_ftype_##A1##_##A2##_##A3
21076 #define CRYPTO1(L, U, R, A) \
21077 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
21078 C (U), BUILT_IN_MD, \
21079 NULL, NULL_TREE);
21080 #define CRYPTO2(L, U, R, A1, A2) \
21081 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
21082 C (U), BUILT_IN_MD, \
21083 NULL, NULL_TREE);
21085 #define CRYPTO3(L, U, R, A1, A2, A3) \
21086 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
21087 C (U), BUILT_IN_MD, \
21088 NULL, NULL_TREE);
21089 #include "crypto.def"
21091 #undef CRYPTO1
21092 #undef CRYPTO2
21093 #undef CRYPTO3
21094 #undef C
21095 #undef N
21096 #undef FT1
21097 #undef FT2
21098 #undef FT3
21100 dreg_types[0] = V8QI_type_node;
21101 dreg_types[1] = V4HI_type_node;
21102 dreg_types[2] = V2SI_type_node;
21103 dreg_types[3] = V2SF_type_node;
21104 dreg_types[4] = neon_intDI_type_node;
21106 qreg_types[0] = V16QI_type_node;
21107 qreg_types[1] = V8HI_type_node;
21108 qreg_types[2] = V4SI_type_node;
21109 qreg_types[3] = V4SF_type_node;
21110 qreg_types[4] = V2DI_type_node;
21111 qreg_types[5] = neon_intUTI_type_node;
21113 for (i = 0; i < NUM_QREG_TYPES; i++)
21115 int j;
21116 for (j = 0; j < NUM_QREG_TYPES; j++)
21118 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
21119 reinterp_ftype_dreg[i][j]
21120 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
21122 reinterp_ftype_qreg[i][j]
21123 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
21127 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
21128 i < ARRAY_SIZE (neon_builtin_data);
21129 i++, fcode++)
21131 neon_builtin_datum *d = &neon_builtin_data[i];
21133 const char* const modenames[] = {
21134 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
21135 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
21136 "ti", "ei", "oi"
21138 char namebuf[60];
21139 tree ftype = NULL;
21140 int is_load = 0, is_store = 0;
21142 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
21144 d->fcode = fcode;
21146 switch (d->itype)
21148 case NEON_LOAD1:
21149 case NEON_LOAD1LANE:
21150 case NEON_LOADSTRUCT:
21151 case NEON_LOADSTRUCTLANE:
21152 is_load = 1;
21153 /* Fall through. */
21154 case NEON_STORE1:
21155 case NEON_STORE1LANE:
21156 case NEON_STORESTRUCT:
21157 case NEON_STORESTRUCTLANE:
21158 if (!is_load)
21159 is_store = 1;
21160 /* Fall through. */
21161 case NEON_UNOP:
21162 case NEON_RINT:
21163 case NEON_BINOP:
21164 case NEON_LOGICBINOP:
21165 case NEON_SHIFTINSERT:
21166 case NEON_TERNOP:
21167 case NEON_GETLANE:
21168 case NEON_SETLANE:
21169 case NEON_CREATE:
21170 case NEON_DUP:
21171 case NEON_DUPLANE:
21172 case NEON_SHIFTIMM:
21173 case NEON_SHIFTACC:
21174 case NEON_COMBINE:
21175 case NEON_SPLIT:
21176 case NEON_CONVERT:
21177 case NEON_FIXCONV:
21178 case NEON_LANEMUL:
21179 case NEON_LANEMULL:
21180 case NEON_LANEMULH:
21181 case NEON_LANEMAC:
21182 case NEON_SCALARMUL:
21183 case NEON_SCALARMULL:
21184 case NEON_SCALARMULH:
21185 case NEON_SCALARMAC:
21186 case NEON_SELECT:
21187 case NEON_VTBL:
21188 case NEON_VTBX:
21190 int k;
21191 tree return_type = void_type_node, args = void_list_node;
21193 /* Build a function type directly from the insn_data for
21194 this builtin. The build_function_type() function takes
21195 care of removing duplicates for us. */
21196 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
21198 tree eltype;
21200 if (is_load && k == 1)
21202 /* Neon load patterns always have the memory
21203 operand in the operand 1 position. */
21204 gcc_assert (insn_data[d->code].operand[k].predicate
21205 == neon_struct_operand);
21207 switch (d->mode)
21209 case T_V8QI:
21210 case T_V16QI:
21211 eltype = const_intQI_pointer_node;
21212 break;
21214 case T_V4HI:
21215 case T_V8HI:
21216 eltype = const_intHI_pointer_node;
21217 break;
21219 case T_V2SI:
21220 case T_V4SI:
21221 eltype = const_intSI_pointer_node;
21222 break;
21224 case T_V2SF:
21225 case T_V4SF:
21226 eltype = const_float_pointer_node;
21227 break;
21229 case T_DI:
21230 case T_V2DI:
21231 eltype = const_intDI_pointer_node;
21232 break;
21234 default: gcc_unreachable ();
21237 else if (is_store && k == 0)
21239 /* Similarly, Neon store patterns use operand 0 as
21240 the memory location to store to. */
21241 gcc_assert (insn_data[d->code].operand[k].predicate
21242 == neon_struct_operand);
21244 switch (d->mode)
21246 case T_V8QI:
21247 case T_V16QI:
21248 eltype = intQI_pointer_node;
21249 break;
21251 case T_V4HI:
21252 case T_V8HI:
21253 eltype = intHI_pointer_node;
21254 break;
21256 case T_V2SI:
21257 case T_V4SI:
21258 eltype = intSI_pointer_node;
21259 break;
21261 case T_V2SF:
21262 case T_V4SF:
21263 eltype = float_pointer_node;
21264 break;
21266 case T_DI:
21267 case T_V2DI:
21268 eltype = intDI_pointer_node;
21269 break;
21271 default: gcc_unreachable ();
21274 else
21276 switch (insn_data[d->code].operand[k].mode)
21278 case VOIDmode: eltype = void_type_node; break;
21279 /* Scalars. */
21280 case QImode: eltype = neon_intQI_type_node; break;
21281 case HImode: eltype = neon_intHI_type_node; break;
21282 case SImode: eltype = neon_intSI_type_node; break;
21283 case SFmode: eltype = neon_float_type_node; break;
21284 case DImode: eltype = neon_intDI_type_node; break;
21285 case TImode: eltype = intTI_type_node; break;
21286 case EImode: eltype = intEI_type_node; break;
21287 case OImode: eltype = intOI_type_node; break;
21288 case CImode: eltype = intCI_type_node; break;
21289 case XImode: eltype = intXI_type_node; break;
21290 /* 64-bit vectors. */
21291 case V8QImode: eltype = V8QI_type_node; break;
21292 case V4HImode: eltype = V4HI_type_node; break;
21293 case V2SImode: eltype = V2SI_type_node; break;
21294 case V2SFmode: eltype = V2SF_type_node; break;
21295 /* 128-bit vectors. */
21296 case V16QImode: eltype = V16QI_type_node; break;
21297 case V8HImode: eltype = V8HI_type_node; break;
21298 case V4SImode: eltype = V4SI_type_node; break;
21299 case V4SFmode: eltype = V4SF_type_node; break;
21300 case V2DImode: eltype = V2DI_type_node; break;
21301 default: gcc_unreachable ();
21305 if (k == 0 && !is_store)
21306 return_type = eltype;
21307 else
21308 args = tree_cons (NULL_TREE, eltype, args);
21311 ftype = build_function_type (return_type, args);
21313 break;
21315 case NEON_RESULTPAIR:
21317 switch (insn_data[d->code].operand[1].mode)
21319 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
21320 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
21321 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
21322 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
21323 case DImode: ftype = void_ftype_pdi_di_di; break;
21324 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
21325 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
21326 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
21327 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
21328 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
21329 default: gcc_unreachable ();
21332 break;
21334 case NEON_REINTERP:
21336 /* We iterate over NUM_DREG_TYPES doubleword types,
21337 then NUM_QREG_TYPES quadword types.
21338 V4HF is not a type used in reinterpret, so we translate
21339 d->mode to the correct index in reinterp_ftype_dreg. */
21340 bool qreg_p
21341 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
21342 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
21343 % NUM_QREG_TYPES;
21344 switch (insn_data[d->code].operand[0].mode)
21346 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
21347 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
21348 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
21349 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
21350 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
21351 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
21352 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
21353 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
21354 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
21355 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
21356 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
21357 default: gcc_unreachable ();
21360 break;
21361 case NEON_FLOAT_WIDEN:
21363 tree eltype = NULL_TREE;
21364 tree return_type = NULL_TREE;
21366 switch (insn_data[d->code].operand[1].mode)
21368 case V4HFmode:
21369 eltype = V4HF_type_node;
21370 return_type = V4SF_type_node;
21371 break;
21372 default: gcc_unreachable ();
21374 ftype = build_function_type_list (return_type, eltype, NULL);
21375 break;
21377 case NEON_FLOAT_NARROW:
21379 tree eltype = NULL_TREE;
21380 tree return_type = NULL_TREE;
21382 switch (insn_data[d->code].operand[1].mode)
21384 case V4SFmode:
21385 eltype = V4SF_type_node;
21386 return_type = V4HF_type_node;
21387 break;
21388 default: gcc_unreachable ();
21390 ftype = build_function_type_list (return_type, eltype, NULL);
21391 break;
21393 default:
21394 gcc_unreachable ();
21397 gcc_assert (ftype != NULL);
21399 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
21401 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
21402 NULL_TREE);
21403 arm_builtin_decls[fcode] = decl;
21407 #undef NUM_DREG_TYPES
21408 #undef NUM_QREG_TYPES
21410 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
21411 do \
21413 if ((MASK) & insn_flags) \
21415 tree bdecl; \
21416 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
21417 BUILT_IN_MD, NULL, NULL_TREE); \
21418 arm_builtin_decls[CODE] = bdecl; \
21421 while (0)
21423 struct builtin_description
21425 const unsigned int mask;
21426 const enum insn_code icode;
21427 const char * const name;
21428 const enum arm_builtins code;
21429 const enum rtx_code comparison;
21430 const unsigned int flag;
21433 static const struct builtin_description bdesc_2arg[] =
21435 #define IWMMXT_BUILTIN(code, string, builtin) \
21436 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
21437 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21439 #define IWMMXT2_BUILTIN(code, string, builtin) \
21440 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
21441 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21443 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
21444 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
21445 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
21446 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
21447 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
21448 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
21449 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
21450 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
21451 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
21452 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
21453 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
21454 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
21455 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
21456 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
21457 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
21458 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
21459 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
21460 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
21461 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
21462 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
21463 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
21464 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
21465 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
21466 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
21467 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
21468 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
21469 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
21470 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
21471 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
21472 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
21473 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
21474 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
21475 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
21476 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
21477 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
21478 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
21479 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
21480 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
21481 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
21482 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
21483 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
21484 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
21485 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
21486 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
21487 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
21488 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
21489 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
21490 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
21491 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
21492 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
21493 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
21494 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
21495 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
21496 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
21497 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
21498 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
21499 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
21500 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
21501 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
21502 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
21503 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
21504 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
21505 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
21506 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
21507 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
21508 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
21509 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
21510 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
21511 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
21512 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
21513 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
21514 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
21515 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
21516 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
21517 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
21518 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
21519 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
21520 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
21522 #define IWMMXT_BUILTIN2(code, builtin) \
21523 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21525 #define IWMMXT2_BUILTIN2(code, builtin) \
21526 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21528 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
21529 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
21530 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
21531 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
21532 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
21533 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
21534 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
21535 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
21536 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
21537 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
21539 #define CRC32_BUILTIN(L, U) \
21540 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
21541 UNKNOWN, 0},
21542 CRC32_BUILTIN (crc32b, CRC32B)
21543 CRC32_BUILTIN (crc32h, CRC32H)
21544 CRC32_BUILTIN (crc32w, CRC32W)
21545 CRC32_BUILTIN (crc32cb, CRC32CB)
21546 CRC32_BUILTIN (crc32ch, CRC32CH)
21547 CRC32_BUILTIN (crc32cw, CRC32CW)
21548 #undef CRC32_BUILTIN
21551 #define CRYPTO_BUILTIN(L, U) \
21552 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
21553 UNKNOWN, 0},
21554 #undef CRYPTO1
21555 #undef CRYPTO2
21556 #undef CRYPTO3
21557 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
21558 #define CRYPTO1(L, U, R, A)
21559 #define CRYPTO3(L, U, R, A1, A2, A3)
21560 #include "crypto.def"
21561 #undef CRYPTO1
21562 #undef CRYPTO2
21563 #undef CRYPTO3
21567 static const struct builtin_description bdesc_1arg[] =
21569 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
21570 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
21571 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
21572 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
21573 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
21574 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
21575 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
21576 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
21577 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
21578 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
21579 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
21580 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
21581 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
21582 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
21583 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
21584 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
21585 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
21586 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
21587 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
21588 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
21589 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
21590 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
21591 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
21592 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
21594 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
21595 #define CRYPTO2(L, U, R, A1, A2)
21596 #define CRYPTO3(L, U, R, A1, A2, A3)
21597 #include "crypto.def"
21598 #undef CRYPTO1
21599 #undef CRYPTO2
21600 #undef CRYPTO3
21603 static const struct builtin_description bdesc_3arg[] =
21605 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
21606 #define CRYPTO1(L, U, R, A)
21607 #define CRYPTO2(L, U, R, A1, A2)
21608 #include "crypto.def"
21609 #undef CRYPTO1
21610 #undef CRYPTO2
21611 #undef CRYPTO3
21613 #undef CRYPTO_BUILTIN
21615 /* Set up all the iWMMXt builtins. This is not called if
21616 TARGET_IWMMXT is zero. */
21618 static void
21619 arm_init_iwmmxt_builtins (void)
21621 const struct builtin_description * d;
21622 size_t i;
21624 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21625 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21626 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
21628 tree v8qi_ftype_v8qi_v8qi_int
21629 = build_function_type_list (V8QI_type_node,
21630 V8QI_type_node, V8QI_type_node,
21631 integer_type_node, NULL_TREE);
21632 tree v4hi_ftype_v4hi_int
21633 = build_function_type_list (V4HI_type_node,
21634 V4HI_type_node, integer_type_node, NULL_TREE);
21635 tree v2si_ftype_v2si_int
21636 = build_function_type_list (V2SI_type_node,
21637 V2SI_type_node, integer_type_node, NULL_TREE);
21638 tree v2si_ftype_di_di
21639 = build_function_type_list (V2SI_type_node,
21640 long_long_integer_type_node,
21641 long_long_integer_type_node,
21642 NULL_TREE);
21643 tree di_ftype_di_int
21644 = build_function_type_list (long_long_integer_type_node,
21645 long_long_integer_type_node,
21646 integer_type_node, NULL_TREE);
21647 tree di_ftype_di_int_int
21648 = build_function_type_list (long_long_integer_type_node,
21649 long_long_integer_type_node,
21650 integer_type_node,
21651 integer_type_node, NULL_TREE);
21652 tree int_ftype_v8qi
21653 = build_function_type_list (integer_type_node,
21654 V8QI_type_node, NULL_TREE);
21655 tree int_ftype_v4hi
21656 = build_function_type_list (integer_type_node,
21657 V4HI_type_node, NULL_TREE);
21658 tree int_ftype_v2si
21659 = build_function_type_list (integer_type_node,
21660 V2SI_type_node, NULL_TREE);
21661 tree int_ftype_v8qi_int
21662 = build_function_type_list (integer_type_node,
21663 V8QI_type_node, integer_type_node, NULL_TREE);
21664 tree int_ftype_v4hi_int
21665 = build_function_type_list (integer_type_node,
21666 V4HI_type_node, integer_type_node, NULL_TREE);
21667 tree int_ftype_v2si_int
21668 = build_function_type_list (integer_type_node,
21669 V2SI_type_node, integer_type_node, NULL_TREE);
21670 tree v8qi_ftype_v8qi_int_int
21671 = build_function_type_list (V8QI_type_node,
21672 V8QI_type_node, integer_type_node,
21673 integer_type_node, NULL_TREE);
21674 tree v4hi_ftype_v4hi_int_int
21675 = build_function_type_list (V4HI_type_node,
21676 V4HI_type_node, integer_type_node,
21677 integer_type_node, NULL_TREE);
21678 tree v2si_ftype_v2si_int_int
21679 = build_function_type_list (V2SI_type_node,
21680 V2SI_type_node, integer_type_node,
21681 integer_type_node, NULL_TREE);
21682 /* Miscellaneous. */
21683 tree v8qi_ftype_v4hi_v4hi
21684 = build_function_type_list (V8QI_type_node,
21685 V4HI_type_node, V4HI_type_node, NULL_TREE);
21686 tree v4hi_ftype_v2si_v2si
21687 = build_function_type_list (V4HI_type_node,
21688 V2SI_type_node, V2SI_type_node, NULL_TREE);
21689 tree v8qi_ftype_v4hi_v8qi
21690 = build_function_type_list (V8QI_type_node,
21691 V4HI_type_node, V8QI_type_node, NULL_TREE);
21692 tree v2si_ftype_v4hi_v4hi
21693 = build_function_type_list (V2SI_type_node,
21694 V4HI_type_node, V4HI_type_node, NULL_TREE);
21695 tree v2si_ftype_v8qi_v8qi
21696 = build_function_type_list (V2SI_type_node,
21697 V8QI_type_node, V8QI_type_node, NULL_TREE);
21698 tree v4hi_ftype_v4hi_di
21699 = build_function_type_list (V4HI_type_node,
21700 V4HI_type_node, long_long_integer_type_node,
21701 NULL_TREE);
21702 tree v2si_ftype_v2si_di
21703 = build_function_type_list (V2SI_type_node,
21704 V2SI_type_node, long_long_integer_type_node,
21705 NULL_TREE);
21706 tree di_ftype_void
21707 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
21708 tree int_ftype_void
21709 = build_function_type_list (integer_type_node, NULL_TREE);
21710 tree di_ftype_v8qi
21711 = build_function_type_list (long_long_integer_type_node,
21712 V8QI_type_node, NULL_TREE);
21713 tree di_ftype_v4hi
21714 = build_function_type_list (long_long_integer_type_node,
21715 V4HI_type_node, NULL_TREE);
21716 tree di_ftype_v2si
21717 = build_function_type_list (long_long_integer_type_node,
21718 V2SI_type_node, NULL_TREE);
21719 tree v2si_ftype_v4hi
21720 = build_function_type_list (V2SI_type_node,
21721 V4HI_type_node, NULL_TREE);
21722 tree v4hi_ftype_v8qi
21723 = build_function_type_list (V4HI_type_node,
21724 V8QI_type_node, NULL_TREE);
21725 tree v8qi_ftype_v8qi
21726 = build_function_type_list (V8QI_type_node,
21727 V8QI_type_node, NULL_TREE);
21728 tree v4hi_ftype_v4hi
21729 = build_function_type_list (V4HI_type_node,
21730 V4HI_type_node, NULL_TREE);
21731 tree v2si_ftype_v2si
21732 = build_function_type_list (V2SI_type_node,
21733 V2SI_type_node, NULL_TREE);
21735 tree di_ftype_di_v4hi_v4hi
21736 = build_function_type_list (long_long_unsigned_type_node,
21737 long_long_unsigned_type_node,
21738 V4HI_type_node, V4HI_type_node,
21739 NULL_TREE);
21741 tree di_ftype_v4hi_v4hi
21742 = build_function_type_list (long_long_unsigned_type_node,
21743 V4HI_type_node,V4HI_type_node,
21744 NULL_TREE);
21746 tree v2si_ftype_v2si_v4hi_v4hi
21747 = build_function_type_list (V2SI_type_node,
21748 V2SI_type_node, V4HI_type_node,
21749 V4HI_type_node, NULL_TREE);
21751 tree v2si_ftype_v2si_v8qi_v8qi
21752 = build_function_type_list (V2SI_type_node,
21753 V2SI_type_node, V8QI_type_node,
21754 V8QI_type_node, NULL_TREE);
21756 tree di_ftype_di_v2si_v2si
21757 = build_function_type_list (long_long_unsigned_type_node,
21758 long_long_unsigned_type_node,
21759 V2SI_type_node, V2SI_type_node,
21760 NULL_TREE);
21762 tree di_ftype_di_di_int
21763 = build_function_type_list (long_long_unsigned_type_node,
21764 long_long_unsigned_type_node,
21765 long_long_unsigned_type_node,
21766 integer_type_node, NULL_TREE);
21768 tree void_ftype_int
21769 = build_function_type_list (void_type_node,
21770 integer_type_node, NULL_TREE);
21772 tree v8qi_ftype_char
21773 = build_function_type_list (V8QI_type_node,
21774 signed_char_type_node, NULL_TREE);
21776 tree v4hi_ftype_short
21777 = build_function_type_list (V4HI_type_node,
21778 short_integer_type_node, NULL_TREE);
21780 tree v2si_ftype_int
21781 = build_function_type_list (V2SI_type_node,
21782 integer_type_node, NULL_TREE);
21784 /* Normal vector binops. */
21785 tree v8qi_ftype_v8qi_v8qi
21786 = build_function_type_list (V8QI_type_node,
21787 V8QI_type_node, V8QI_type_node, NULL_TREE);
21788 tree v4hi_ftype_v4hi_v4hi
21789 = build_function_type_list (V4HI_type_node,
21790 V4HI_type_node,V4HI_type_node, NULL_TREE);
21791 tree v2si_ftype_v2si_v2si
21792 = build_function_type_list (V2SI_type_node,
21793 V2SI_type_node, V2SI_type_node, NULL_TREE);
21794 tree di_ftype_di_di
21795 = build_function_type_list (long_long_unsigned_type_node,
21796 long_long_unsigned_type_node,
21797 long_long_unsigned_type_node,
21798 NULL_TREE);
21800 /* Add all builtins that are more or less simple operations on two
21801 operands. */
21802 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21804 /* Use one of the operands; the target can have a different mode for
21805 mask-generating compares. */
21806 enum machine_mode mode;
21807 tree type;
21809 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
21810 continue;
21812 mode = insn_data[d->icode].operand[1].mode;
21814 switch (mode)
21816 case V8QImode:
21817 type = v8qi_ftype_v8qi_v8qi;
21818 break;
21819 case V4HImode:
21820 type = v4hi_ftype_v4hi_v4hi;
21821 break;
21822 case V2SImode:
21823 type = v2si_ftype_v2si_v2si;
21824 break;
21825 case DImode:
21826 type = di_ftype_di_di;
21827 break;
21829 default:
21830 gcc_unreachable ();
21833 def_mbuiltin (d->mask, d->name, type, d->code);
21836 /* Add the remaining MMX insns with somewhat more complicated types. */
21837 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
21838 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
21839 ARM_BUILTIN_ ## CODE)
21841 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
21842 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
21843 ARM_BUILTIN_ ## CODE)
21845 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
21846 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
21847 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
21848 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
21849 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
21850 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
21851 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
21852 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
21853 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
21855 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
21856 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
21857 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
21858 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
21859 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
21860 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
21862 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
21863 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
21864 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
21865 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
21866 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
21867 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
21869 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
21870 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
21871 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
21872 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
21873 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
21874 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
21876 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
21877 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
21878 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
21879 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
21880 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
21881 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
21883 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
21885 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
21886 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
21887 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
21888 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
21889 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
21890 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
21891 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
21892 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
21893 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
21894 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
21896 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
21897 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
21898 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
21899 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
21900 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
21901 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
21902 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
21903 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
21904 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
21906 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
21907 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
21908 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
21910 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
21911 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
21912 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
21914 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
21915 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
21917 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
21918 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
21919 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
21920 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
21921 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
21922 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
21924 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
21925 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
21926 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
21927 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
21928 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
21929 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
21930 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
21931 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
21932 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
21933 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
21934 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
21935 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
21937 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
21938 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
21939 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
21940 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
21942 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
21943 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
21944 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
21945 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
21946 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
21947 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
21948 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
21950 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
21951 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
21952 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
21954 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
21955 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
21956 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
21957 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
21959 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
21960 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
21961 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
21962 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
21964 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
21965 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
21966 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
21967 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
21969 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
21970 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
21971 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
21972 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
21974 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
21975 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
21976 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
21977 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
21979 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
21980 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
21981 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
21982 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
21984 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
21986 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
21987 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
21988 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
21990 #undef iwmmx_mbuiltin
21991 #undef iwmmx2_mbuiltin
21994 static void
21995 arm_init_fp16_builtins (void)
21997 tree fp16_type = make_node (REAL_TYPE);
21998 TYPE_PRECISION (fp16_type) = 16;
21999 layout_type (fp16_type);
22000 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
22003 static void
22004 arm_init_crc32_builtins ()
22006 tree si_ftype_si_qi
22007 = build_function_type_list (unsigned_intSI_type_node,
22008 unsigned_intSI_type_node,
22009 unsigned_intQI_type_node, NULL_TREE);
22010 tree si_ftype_si_hi
22011 = build_function_type_list (unsigned_intSI_type_node,
22012 unsigned_intSI_type_node,
22013 unsigned_intHI_type_node, NULL_TREE);
22014 tree si_ftype_si_si
22015 = build_function_type_list (unsigned_intSI_type_node,
22016 unsigned_intSI_type_node,
22017 unsigned_intSI_type_node, NULL_TREE);
22019 arm_builtin_decls[ARM_BUILTIN_CRC32B]
22020 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
22021 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
22022 arm_builtin_decls[ARM_BUILTIN_CRC32H]
22023 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
22024 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
22025 arm_builtin_decls[ARM_BUILTIN_CRC32W]
22026 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
22027 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
22028 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
22029 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
22030 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
22031 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
22032 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
22033 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
22034 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
22035 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
22036 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
22039 static void
22040 arm_init_builtins (void)
22042 if (TARGET_REALLY_IWMMXT)
22043 arm_init_iwmmxt_builtins ();
22045 if (TARGET_NEON)
22046 arm_init_neon_builtins ();
22048 if (arm_fp16_format)
22049 arm_init_fp16_builtins ();
22051 if (TARGET_CRC32)
22052 arm_init_crc32_builtins ();
22055 /* Return the ARM builtin for CODE. */
22057 static tree
22058 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
22060 if (code >= ARM_BUILTIN_MAX)
22061 return error_mark_node;
22063 return arm_builtin_decls[code];
22066 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
22068 static const char *
22069 arm_invalid_parameter_type (const_tree t)
22071 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22072 return N_("function parameters cannot have __fp16 type");
22073 return NULL;
22076 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
22078 static const char *
22079 arm_invalid_return_type (const_tree t)
22081 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22082 return N_("functions cannot return __fp16 type");
22083 return NULL;
22086 /* Implement TARGET_PROMOTED_TYPE. */
22088 static tree
22089 arm_promoted_type (const_tree t)
22091 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22092 return float_type_node;
22093 return NULL_TREE;
22096 /* Implement TARGET_CONVERT_TO_TYPE.
22097 Specifically, this hook implements the peculiarity of the ARM
22098 half-precision floating-point C semantics that requires conversions between
22099 __fp16 to or from double to do an intermediate conversion to float. */
22101 static tree
22102 arm_convert_to_type (tree type, tree expr)
22104 tree fromtype = TREE_TYPE (expr);
22105 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
22106 return NULL_TREE;
22107 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
22108 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
22109 return convert (type, convert (float_type_node, expr));
22110 return NULL_TREE;
22113 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22114 This simply adds HFmode as a supported mode; even though we don't
22115 implement arithmetic on this type directly, it's supported by
22116 optabs conversions, much the way the double-word arithmetic is
22117 special-cased in the default hook. */
22119 static bool
22120 arm_scalar_mode_supported_p (enum machine_mode mode)
22122 if (mode == HFmode)
22123 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
22124 else if (ALL_FIXED_POINT_MODE_P (mode))
22125 return true;
22126 else
22127 return default_scalar_mode_supported_p (mode);
22130 /* Errors in the source file can cause expand_expr to return const0_rtx
22131 where we expect a vector. To avoid crashing, use one of the vector
22132 clear instructions. */
22134 static rtx
22135 safe_vector_operand (rtx x, enum machine_mode mode)
22137 if (x != const0_rtx)
22138 return x;
22139 x = gen_reg_rtx (mode);
22141 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
22142 : gen_rtx_SUBREG (DImode, x, 0)));
22143 return x;
22146 /* Function to expand ternary builtins. */
22147 static rtx
22148 arm_expand_ternop_builtin (enum insn_code icode,
22149 tree exp, rtx target)
22151 rtx pat;
22152 tree arg0 = CALL_EXPR_ARG (exp, 0);
22153 tree arg1 = CALL_EXPR_ARG (exp, 1);
22154 tree arg2 = CALL_EXPR_ARG (exp, 2);
22156 rtx op0 = expand_normal (arg0);
22157 rtx op1 = expand_normal (arg1);
22158 rtx op2 = expand_normal (arg2);
22159 rtx op3 = NULL_RTX;
22161 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
22162 lane operand depending on endianness. */
22163 bool builtin_sha1cpm_p = false;
22165 if (insn_data[icode].n_operands == 5)
22167 gcc_assert (icode == CODE_FOR_crypto_sha1c
22168 || icode == CODE_FOR_crypto_sha1p
22169 || icode == CODE_FOR_crypto_sha1m);
22170 builtin_sha1cpm_p = true;
22172 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22173 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22174 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22175 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
22178 if (VECTOR_MODE_P (mode0))
22179 op0 = safe_vector_operand (op0, mode0);
22180 if (VECTOR_MODE_P (mode1))
22181 op1 = safe_vector_operand (op1, mode1);
22182 if (VECTOR_MODE_P (mode2))
22183 op2 = safe_vector_operand (op2, mode2);
22185 if (! target
22186 || GET_MODE (target) != tmode
22187 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22188 target = gen_reg_rtx (tmode);
22190 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
22191 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
22192 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
22194 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22195 op0 = copy_to_mode_reg (mode0, op0);
22196 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22197 op1 = copy_to_mode_reg (mode1, op1);
22198 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22199 op2 = copy_to_mode_reg (mode2, op2);
22200 if (builtin_sha1cpm_p)
22201 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
22203 if (builtin_sha1cpm_p)
22204 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
22205 else
22206 pat = GEN_FCN (icode) (target, op0, op1, op2);
22207 if (! pat)
22208 return 0;
22209 emit_insn (pat);
22210 return target;
22213 /* Subroutine of arm_expand_builtin to take care of binop insns. */
22215 static rtx
22216 arm_expand_binop_builtin (enum insn_code icode,
22217 tree exp, rtx target)
22219 rtx pat;
22220 tree arg0 = CALL_EXPR_ARG (exp, 0);
22221 tree arg1 = CALL_EXPR_ARG (exp, 1);
22222 rtx op0 = expand_normal (arg0);
22223 rtx op1 = expand_normal (arg1);
22224 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22225 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22226 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22228 if (VECTOR_MODE_P (mode0))
22229 op0 = safe_vector_operand (op0, mode0);
22230 if (VECTOR_MODE_P (mode1))
22231 op1 = safe_vector_operand (op1, mode1);
22233 if (! target
22234 || GET_MODE (target) != tmode
22235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22236 target = gen_reg_rtx (tmode);
22238 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
22239 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
22241 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22242 op0 = copy_to_mode_reg (mode0, op0);
22243 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22244 op1 = copy_to_mode_reg (mode1, op1);
22246 pat = GEN_FCN (icode) (target, op0, op1);
22247 if (! pat)
22248 return 0;
22249 emit_insn (pat);
22250 return target;
22253 /* Subroutine of arm_expand_builtin to take care of unop insns. */
22255 static rtx
22256 arm_expand_unop_builtin (enum insn_code icode,
22257 tree exp, rtx target, int do_load)
22259 rtx pat;
22260 tree arg0 = CALL_EXPR_ARG (exp, 0);
22261 rtx op0 = expand_normal (arg0);
22262 rtx op1 = NULL_RTX;
22263 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22264 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22265 bool builtin_sha1h_p = false;
22267 if (insn_data[icode].n_operands == 3)
22269 gcc_assert (icode == CODE_FOR_crypto_sha1h);
22270 builtin_sha1h_p = true;
22273 if (! target
22274 || GET_MODE (target) != tmode
22275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22276 target = gen_reg_rtx (tmode);
22277 if (do_load)
22278 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
22279 else
22281 if (VECTOR_MODE_P (mode0))
22282 op0 = safe_vector_operand (op0, mode0);
22284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22285 op0 = copy_to_mode_reg (mode0, op0);
22287 if (builtin_sha1h_p)
22288 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
22290 if (builtin_sha1h_p)
22291 pat = GEN_FCN (icode) (target, op0, op1);
22292 else
22293 pat = GEN_FCN (icode) (target, op0);
22294 if (! pat)
22295 return 0;
22296 emit_insn (pat);
22297 return target;
22300 typedef enum {
22301 NEON_ARG_COPY_TO_REG,
22302 NEON_ARG_CONSTANT,
22303 NEON_ARG_MEMORY,
22304 NEON_ARG_STOP
22305 } builtin_arg;
22307 #define NEON_MAX_BUILTIN_ARGS 5
22309 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
22310 and return an expression for the accessed memory.
22312 The intrinsic function operates on a block of registers that has
22313 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
22314 function references the memory at EXP of type TYPE and in mode
22315 MEM_MODE; this mode may be BLKmode if no more suitable mode is
22316 available. */
22318 static tree
22319 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
22320 enum machine_mode reg_mode,
22321 neon_builtin_type_mode type_mode)
22323 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
22324 tree elem_type, upper_bound, array_type;
22326 /* Work out the size of the register block in bytes. */
22327 reg_size = GET_MODE_SIZE (reg_mode);
22329 /* Work out the size of each vector in bytes. */
22330 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
22331 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
22333 /* Work out how many vectors there are. */
22334 gcc_assert (reg_size % vector_size == 0);
22335 nvectors = reg_size / vector_size;
22337 /* Work out the type of each element. */
22338 gcc_assert (POINTER_TYPE_P (type));
22339 elem_type = TREE_TYPE (type);
22341 /* Work out how many elements are being loaded or stored.
22342 MEM_MODE == REG_MODE implies a one-to-one mapping between register
22343 and memory elements; anything else implies a lane load or store. */
22344 if (mem_mode == reg_mode)
22345 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
22346 else
22347 nelems = nvectors;
22349 /* Create a type that describes the full access. */
22350 upper_bound = build_int_cst (size_type_node, nelems - 1);
22351 array_type = build_array_type (elem_type, build_index_type (upper_bound));
22353 /* Dereference EXP using that type. */
22354 return fold_build2 (MEM_REF, array_type, exp,
22355 build_int_cst (build_pointer_type (array_type), 0));
22358 /* Expand a Neon builtin. */
22359 static rtx
22360 arm_expand_neon_args (rtx target, int icode, int have_retval,
22361 neon_builtin_type_mode type_mode,
22362 tree exp, int fcode, ...)
22364 va_list ap;
22365 rtx pat;
22366 tree arg[NEON_MAX_BUILTIN_ARGS];
22367 rtx op[NEON_MAX_BUILTIN_ARGS];
22368 tree arg_type;
22369 tree formals;
22370 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22371 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
22372 enum machine_mode other_mode;
22373 int argc = 0;
22374 int opno;
22376 if (have_retval
22377 && (!target
22378 || GET_MODE (target) != tmode
22379 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
22380 target = gen_reg_rtx (tmode);
22382 va_start (ap, fcode);
22384 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
22386 for (;;)
22388 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
22390 if (thisarg == NEON_ARG_STOP)
22391 break;
22392 else
22394 opno = argc + have_retval;
22395 mode[argc] = insn_data[icode].operand[opno].mode;
22396 arg[argc] = CALL_EXPR_ARG (exp, argc);
22397 arg_type = TREE_VALUE (formals);
22398 if (thisarg == NEON_ARG_MEMORY)
22400 other_mode = insn_data[icode].operand[1 - opno].mode;
22401 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
22402 mode[argc], other_mode,
22403 type_mode);
22406 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
22407 be returned. */
22408 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
22409 (thisarg == NEON_ARG_MEMORY
22410 ? EXPAND_MEMORY : EXPAND_NORMAL));
22412 switch (thisarg)
22414 case NEON_ARG_COPY_TO_REG:
22415 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
22416 if (!(*insn_data[icode].operand[opno].predicate)
22417 (op[argc], mode[argc]))
22418 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
22419 break;
22421 case NEON_ARG_CONSTANT:
22422 /* FIXME: This error message is somewhat unhelpful. */
22423 if (!(*insn_data[icode].operand[opno].predicate)
22424 (op[argc], mode[argc]))
22425 error ("argument must be a constant");
22426 break;
22428 case NEON_ARG_MEMORY:
22429 /* Check if expand failed. */
22430 if (op[argc] == const0_rtx)
22431 return 0;
22432 gcc_assert (MEM_P (op[argc]));
22433 PUT_MODE (op[argc], mode[argc]);
22434 /* ??? arm_neon.h uses the same built-in functions for signed
22435 and unsigned accesses, casting where necessary. This isn't
22436 alias safe. */
22437 set_mem_alias_set (op[argc], 0);
22438 if (!(*insn_data[icode].operand[opno].predicate)
22439 (op[argc], mode[argc]))
22440 op[argc] = (replace_equiv_address
22441 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
22442 break;
22444 case NEON_ARG_STOP:
22445 gcc_unreachable ();
22448 argc++;
22449 formals = TREE_CHAIN (formals);
22453 va_end (ap);
22455 if (have_retval)
22456 switch (argc)
22458 case 1:
22459 pat = GEN_FCN (icode) (target, op[0]);
22460 break;
22462 case 2:
22463 pat = GEN_FCN (icode) (target, op[0], op[1]);
22464 break;
22466 case 3:
22467 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
22468 break;
22470 case 4:
22471 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
22472 break;
22474 case 5:
22475 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
22476 break;
22478 default:
22479 gcc_unreachable ();
22481 else
22482 switch (argc)
22484 case 1:
22485 pat = GEN_FCN (icode) (op[0]);
22486 break;
22488 case 2:
22489 pat = GEN_FCN (icode) (op[0], op[1]);
22490 break;
22492 case 3:
22493 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
22494 break;
22496 case 4:
22497 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
22498 break;
22500 case 5:
22501 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
22502 break;
22504 default:
22505 gcc_unreachable ();
22508 if (!pat)
22509 return 0;
22511 emit_insn (pat);
22513 return target;
22516 /* Expand a Neon builtin. These are "special" because they don't have symbolic
22517 constants defined per-instruction or per instruction-variant. Instead, the
22518 required info is looked up in the table neon_builtin_data. */
22519 static rtx
22520 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
22522 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
22523 neon_itype itype = d->itype;
22524 enum insn_code icode = d->code;
22525 neon_builtin_type_mode type_mode = d->mode;
22527 switch (itype)
22529 case NEON_UNOP:
22530 case NEON_CONVERT:
22531 case NEON_DUPLANE:
22532 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22533 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22535 case NEON_BINOP:
22536 case NEON_SETLANE:
22537 case NEON_SCALARMUL:
22538 case NEON_SCALARMULL:
22539 case NEON_SCALARMULH:
22540 case NEON_SHIFTINSERT:
22541 case NEON_LOGICBINOP:
22542 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22543 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22544 NEON_ARG_STOP);
22546 case NEON_TERNOP:
22547 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22548 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22549 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22551 case NEON_GETLANE:
22552 case NEON_FIXCONV:
22553 case NEON_SHIFTIMM:
22554 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22555 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
22556 NEON_ARG_STOP);
22558 case NEON_CREATE:
22559 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22560 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22562 case NEON_DUP:
22563 case NEON_RINT:
22564 case NEON_SPLIT:
22565 case NEON_FLOAT_WIDEN:
22566 case NEON_FLOAT_NARROW:
22567 case NEON_REINTERP:
22568 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22569 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22571 case NEON_COMBINE:
22572 case NEON_VTBL:
22573 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22574 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22576 case NEON_RESULTPAIR:
22577 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22578 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22579 NEON_ARG_STOP);
22581 case NEON_LANEMUL:
22582 case NEON_LANEMULL:
22583 case NEON_LANEMULH:
22584 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22585 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22586 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22588 case NEON_LANEMAC:
22589 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22590 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22591 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22593 case NEON_SHIFTACC:
22594 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22595 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22596 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22598 case NEON_SCALARMAC:
22599 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22600 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22601 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22603 case NEON_SELECT:
22604 case NEON_VTBX:
22605 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22606 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22607 NEON_ARG_STOP);
22609 case NEON_LOAD1:
22610 case NEON_LOADSTRUCT:
22611 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22612 NEON_ARG_MEMORY, NEON_ARG_STOP);
22614 case NEON_LOAD1LANE:
22615 case NEON_LOADSTRUCTLANE:
22616 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22617 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22618 NEON_ARG_STOP);
22620 case NEON_STORE1:
22621 case NEON_STORESTRUCT:
22622 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22623 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22625 case NEON_STORE1LANE:
22626 case NEON_STORESTRUCTLANE:
22627 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22628 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22629 NEON_ARG_STOP);
22632 gcc_unreachable ();
22635 /* Emit code to reinterpret one Neon type as another, without altering bits. */
22636 void
22637 neon_reinterpret (rtx dest, rtx src)
22639 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
22642 /* Emit code to place a Neon pair result in memory locations (with equal
22643 registers). */
22644 void
22645 neon_emit_pair_result_insn (enum machine_mode mode,
22646 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
22647 rtx op1, rtx op2)
22649 rtx mem = gen_rtx_MEM (mode, destaddr);
22650 rtx tmp1 = gen_reg_rtx (mode);
22651 rtx tmp2 = gen_reg_rtx (mode);
22653 emit_insn (intfn (tmp1, op1, op2, tmp2));
22655 emit_move_insn (mem, tmp1);
22656 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
22657 emit_move_insn (mem, tmp2);
22660 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22661 not to early-clobber SRC registers in the process.
22663 We assume that the operands described by SRC and DEST represent a
22664 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22665 number of components into which the copy has been decomposed. */
22666 void
22667 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22669 unsigned int i;
22671 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22672 || REGNO (operands[0]) < REGNO (operands[1]))
22674 for (i = 0; i < count; i++)
22676 operands[2 * i] = dest[i];
22677 operands[2 * i + 1] = src[i];
22680 else
22682 for (i = 0; i < count; i++)
22684 operands[2 * i] = dest[count - i - 1];
22685 operands[2 * i + 1] = src[count - i - 1];
22690 /* Split operands into moves from op[1] + op[2] into op[0]. */
22692 void
22693 neon_split_vcombine (rtx operands[3])
22695 unsigned int dest = REGNO (operands[0]);
22696 unsigned int src1 = REGNO (operands[1]);
22697 unsigned int src2 = REGNO (operands[2]);
22698 enum machine_mode halfmode = GET_MODE (operands[1]);
22699 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22700 rtx destlo, desthi;
22702 if (src1 == dest && src2 == dest + halfregs)
22704 /* No-op move. Can't split to nothing; emit something. */
22705 emit_note (NOTE_INSN_DELETED);
22706 return;
22709 /* Preserve register attributes for variable tracking. */
22710 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22711 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22712 GET_MODE_SIZE (halfmode));
22714 /* Special case of reversed high/low parts. Use VSWP. */
22715 if (src2 == dest && src1 == dest + halfregs)
22717 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
22718 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
22719 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22720 return;
22723 if (!reg_overlap_mentioned_p (operands[2], destlo))
22725 /* Try to avoid unnecessary moves if part of the result
22726 is in the right place already. */
22727 if (src1 != dest)
22728 emit_move_insn (destlo, operands[1]);
22729 if (src2 != dest + halfregs)
22730 emit_move_insn (desthi, operands[2]);
22732 else
22734 if (src2 != dest + halfregs)
22735 emit_move_insn (desthi, operands[2]);
22736 if (src1 != dest)
22737 emit_move_insn (destlo, operands[1]);
22741 /* Expand an expression EXP that calls a built-in function,
22742 with result going to TARGET if that's convenient
22743 (and in mode MODE if that's convenient).
22744 SUBTARGET may be used as the target for computing one of EXP's operands.
22745 IGNORE is nonzero if the value is to be ignored. */
22747 static rtx
22748 arm_expand_builtin (tree exp,
22749 rtx target,
22750 rtx subtarget ATTRIBUTE_UNUSED,
22751 enum machine_mode mode ATTRIBUTE_UNUSED,
22752 int ignore ATTRIBUTE_UNUSED)
22754 const struct builtin_description * d;
22755 enum insn_code icode;
22756 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22757 tree arg0;
22758 tree arg1;
22759 tree arg2;
22760 rtx op0;
22761 rtx op1;
22762 rtx op2;
22763 rtx pat;
22764 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22765 size_t i;
22766 enum machine_mode tmode;
22767 enum machine_mode mode0;
22768 enum machine_mode mode1;
22769 enum machine_mode mode2;
22770 int opint;
22771 int selector;
22772 int mask;
22773 int imm;
22775 if (fcode >= ARM_BUILTIN_NEON_BASE)
22776 return arm_expand_neon_builtin (fcode, exp, target);
22778 switch (fcode)
22780 case ARM_BUILTIN_TEXTRMSB:
22781 case ARM_BUILTIN_TEXTRMUB:
22782 case ARM_BUILTIN_TEXTRMSH:
22783 case ARM_BUILTIN_TEXTRMUH:
22784 case ARM_BUILTIN_TEXTRMSW:
22785 case ARM_BUILTIN_TEXTRMUW:
22786 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
22787 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
22788 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
22789 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
22790 : CODE_FOR_iwmmxt_textrmw);
22792 arg0 = CALL_EXPR_ARG (exp, 0);
22793 arg1 = CALL_EXPR_ARG (exp, 1);
22794 op0 = expand_normal (arg0);
22795 op1 = expand_normal (arg1);
22796 tmode = insn_data[icode].operand[0].mode;
22797 mode0 = insn_data[icode].operand[1].mode;
22798 mode1 = insn_data[icode].operand[2].mode;
22800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22801 op0 = copy_to_mode_reg (mode0, op0);
22802 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22804 /* @@@ better error message */
22805 error ("selector must be an immediate");
22806 return gen_reg_rtx (tmode);
22809 opint = INTVAL (op1);
22810 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
22812 if (opint > 7 || opint < 0)
22813 error ("the range of selector should be in 0 to 7");
22815 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
22817 if (opint > 3 || opint < 0)
22818 error ("the range of selector should be in 0 to 3");
22820 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
22822 if (opint > 1 || opint < 0)
22823 error ("the range of selector should be in 0 to 1");
22826 if (target == 0
22827 || GET_MODE (target) != tmode
22828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22829 target = gen_reg_rtx (tmode);
22830 pat = GEN_FCN (icode) (target, op0, op1);
22831 if (! pat)
22832 return 0;
22833 emit_insn (pat);
22834 return target;
22836 case ARM_BUILTIN_WALIGNI:
22837 /* If op2 is immediate, call walighi, else call walighr. */
22838 arg0 = CALL_EXPR_ARG (exp, 0);
22839 arg1 = CALL_EXPR_ARG (exp, 1);
22840 arg2 = CALL_EXPR_ARG (exp, 2);
22841 op0 = expand_normal (arg0);
22842 op1 = expand_normal (arg1);
22843 op2 = expand_normal (arg2);
22844 if (CONST_INT_P (op2))
22846 icode = CODE_FOR_iwmmxt_waligni;
22847 tmode = insn_data[icode].operand[0].mode;
22848 mode0 = insn_data[icode].operand[1].mode;
22849 mode1 = insn_data[icode].operand[2].mode;
22850 mode2 = insn_data[icode].operand[3].mode;
22851 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22852 op0 = copy_to_mode_reg (mode0, op0);
22853 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22854 op1 = copy_to_mode_reg (mode1, op1);
22855 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
22856 selector = INTVAL (op2);
22857 if (selector > 7 || selector < 0)
22858 error ("the range of selector should be in 0 to 7");
22860 else
22862 icode = CODE_FOR_iwmmxt_walignr;
22863 tmode = insn_data[icode].operand[0].mode;
22864 mode0 = insn_data[icode].operand[1].mode;
22865 mode1 = insn_data[icode].operand[2].mode;
22866 mode2 = insn_data[icode].operand[3].mode;
22867 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22868 op0 = copy_to_mode_reg (mode0, op0);
22869 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22870 op1 = copy_to_mode_reg (mode1, op1);
22871 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
22872 op2 = copy_to_mode_reg (mode2, op2);
22874 if (target == 0
22875 || GET_MODE (target) != tmode
22876 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22877 target = gen_reg_rtx (tmode);
22878 pat = GEN_FCN (icode) (target, op0, op1, op2);
22879 if (!pat)
22880 return 0;
22881 emit_insn (pat);
22882 return target;
22884 case ARM_BUILTIN_TINSRB:
22885 case ARM_BUILTIN_TINSRH:
22886 case ARM_BUILTIN_TINSRW:
22887 case ARM_BUILTIN_WMERGE:
22888 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
22889 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
22890 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
22891 : CODE_FOR_iwmmxt_tinsrw);
22892 arg0 = CALL_EXPR_ARG (exp, 0);
22893 arg1 = CALL_EXPR_ARG (exp, 1);
22894 arg2 = CALL_EXPR_ARG (exp, 2);
22895 op0 = expand_normal (arg0);
22896 op1 = expand_normal (arg1);
22897 op2 = expand_normal (arg2);
22898 tmode = insn_data[icode].operand[0].mode;
22899 mode0 = insn_data[icode].operand[1].mode;
22900 mode1 = insn_data[icode].operand[2].mode;
22901 mode2 = insn_data[icode].operand[3].mode;
22903 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22904 op0 = copy_to_mode_reg (mode0, op0);
22905 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22906 op1 = copy_to_mode_reg (mode1, op1);
22907 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22909 error ("selector must be an immediate");
22910 return const0_rtx;
22912 if (icode == CODE_FOR_iwmmxt_wmerge)
22914 selector = INTVAL (op2);
22915 if (selector > 7 || selector < 0)
22916 error ("the range of selector should be in 0 to 7");
22918 if ((icode == CODE_FOR_iwmmxt_tinsrb)
22919 || (icode == CODE_FOR_iwmmxt_tinsrh)
22920 || (icode == CODE_FOR_iwmmxt_tinsrw))
22922 mask = 0x01;
22923 selector= INTVAL (op2);
22924 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
22925 error ("the range of selector should be in 0 to 7");
22926 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
22927 error ("the range of selector should be in 0 to 3");
22928 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
22929 error ("the range of selector should be in 0 to 1");
22930 mask <<= selector;
22931 op2 = GEN_INT (mask);
22933 if (target == 0
22934 || GET_MODE (target) != tmode
22935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22936 target = gen_reg_rtx (tmode);
22937 pat = GEN_FCN (icode) (target, op0, op1, op2);
22938 if (! pat)
22939 return 0;
22940 emit_insn (pat);
22941 return target;
22943 case ARM_BUILTIN_SETWCGR0:
22944 case ARM_BUILTIN_SETWCGR1:
22945 case ARM_BUILTIN_SETWCGR2:
22946 case ARM_BUILTIN_SETWCGR3:
22947 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
22948 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
22949 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
22950 : CODE_FOR_iwmmxt_setwcgr3);
22951 arg0 = CALL_EXPR_ARG (exp, 0);
22952 op0 = expand_normal (arg0);
22953 mode0 = insn_data[icode].operand[0].mode;
22954 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
22955 op0 = copy_to_mode_reg (mode0, op0);
22956 pat = GEN_FCN (icode) (op0);
22957 if (!pat)
22958 return 0;
22959 emit_insn (pat);
22960 return 0;
22962 case ARM_BUILTIN_GETWCGR0:
22963 case ARM_BUILTIN_GETWCGR1:
22964 case ARM_BUILTIN_GETWCGR2:
22965 case ARM_BUILTIN_GETWCGR3:
22966 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
22967 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
22968 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
22969 : CODE_FOR_iwmmxt_getwcgr3);
22970 tmode = insn_data[icode].operand[0].mode;
22971 if (target == 0
22972 || GET_MODE (target) != tmode
22973 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22974 target = gen_reg_rtx (tmode);
22975 pat = GEN_FCN (icode) (target);
22976 if (!pat)
22977 return 0;
22978 emit_insn (pat);
22979 return target;
22981 case ARM_BUILTIN_WSHUFH:
22982 icode = CODE_FOR_iwmmxt_wshufh;
22983 arg0 = CALL_EXPR_ARG (exp, 0);
22984 arg1 = CALL_EXPR_ARG (exp, 1);
22985 op0 = expand_normal (arg0);
22986 op1 = expand_normal (arg1);
22987 tmode = insn_data[icode].operand[0].mode;
22988 mode1 = insn_data[icode].operand[1].mode;
22989 mode2 = insn_data[icode].operand[2].mode;
22991 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
22992 op0 = copy_to_mode_reg (mode1, op0);
22993 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
22995 error ("mask must be an immediate");
22996 return const0_rtx;
22998 selector = INTVAL (op1);
22999 if (selector < 0 || selector > 255)
23000 error ("the range of mask should be in 0 to 255");
23001 if (target == 0
23002 || GET_MODE (target) != tmode
23003 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23004 target = gen_reg_rtx (tmode);
23005 pat = GEN_FCN (icode) (target, op0, op1);
23006 if (! pat)
23007 return 0;
23008 emit_insn (pat);
23009 return target;
23011 case ARM_BUILTIN_WMADDS:
23012 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
23013 case ARM_BUILTIN_WMADDSX:
23014 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
23015 case ARM_BUILTIN_WMADDSN:
23016 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
23017 case ARM_BUILTIN_WMADDU:
23018 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
23019 case ARM_BUILTIN_WMADDUX:
23020 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
23021 case ARM_BUILTIN_WMADDUN:
23022 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
23023 case ARM_BUILTIN_WSADBZ:
23024 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
23025 case ARM_BUILTIN_WSADHZ:
23026 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
23028 /* Several three-argument builtins. */
23029 case ARM_BUILTIN_WMACS:
23030 case ARM_BUILTIN_WMACU:
23031 case ARM_BUILTIN_TMIA:
23032 case ARM_BUILTIN_TMIAPH:
23033 case ARM_BUILTIN_TMIATT:
23034 case ARM_BUILTIN_TMIATB:
23035 case ARM_BUILTIN_TMIABT:
23036 case ARM_BUILTIN_TMIABB:
23037 case ARM_BUILTIN_WQMIABB:
23038 case ARM_BUILTIN_WQMIABT:
23039 case ARM_BUILTIN_WQMIATB:
23040 case ARM_BUILTIN_WQMIATT:
23041 case ARM_BUILTIN_WQMIABBN:
23042 case ARM_BUILTIN_WQMIABTN:
23043 case ARM_BUILTIN_WQMIATBN:
23044 case ARM_BUILTIN_WQMIATTN:
23045 case ARM_BUILTIN_WMIABB:
23046 case ARM_BUILTIN_WMIABT:
23047 case ARM_BUILTIN_WMIATB:
23048 case ARM_BUILTIN_WMIATT:
23049 case ARM_BUILTIN_WMIABBN:
23050 case ARM_BUILTIN_WMIABTN:
23051 case ARM_BUILTIN_WMIATBN:
23052 case ARM_BUILTIN_WMIATTN:
23053 case ARM_BUILTIN_WMIAWBB:
23054 case ARM_BUILTIN_WMIAWBT:
23055 case ARM_BUILTIN_WMIAWTB:
23056 case ARM_BUILTIN_WMIAWTT:
23057 case ARM_BUILTIN_WMIAWBBN:
23058 case ARM_BUILTIN_WMIAWBTN:
23059 case ARM_BUILTIN_WMIAWTBN:
23060 case ARM_BUILTIN_WMIAWTTN:
23061 case ARM_BUILTIN_WSADB:
23062 case ARM_BUILTIN_WSADH:
23063 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
23064 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
23065 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
23066 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
23067 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
23068 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
23069 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
23070 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
23071 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
23072 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
23073 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
23074 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
23075 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
23076 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
23077 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
23078 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
23079 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
23080 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
23081 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
23082 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
23083 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
23084 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
23085 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
23086 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
23087 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
23088 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
23089 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
23090 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
23091 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
23092 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
23093 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
23094 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
23095 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
23096 : CODE_FOR_iwmmxt_wsadh);
23097 arg0 = CALL_EXPR_ARG (exp, 0);
23098 arg1 = CALL_EXPR_ARG (exp, 1);
23099 arg2 = CALL_EXPR_ARG (exp, 2);
23100 op0 = expand_normal (arg0);
23101 op1 = expand_normal (arg1);
23102 op2 = expand_normal (arg2);
23103 tmode = insn_data[icode].operand[0].mode;
23104 mode0 = insn_data[icode].operand[1].mode;
23105 mode1 = insn_data[icode].operand[2].mode;
23106 mode2 = insn_data[icode].operand[3].mode;
23108 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23109 op0 = copy_to_mode_reg (mode0, op0);
23110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
23111 op1 = copy_to_mode_reg (mode1, op1);
23112 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
23113 op2 = copy_to_mode_reg (mode2, op2);
23114 if (target == 0
23115 || GET_MODE (target) != tmode
23116 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23117 target = gen_reg_rtx (tmode);
23118 pat = GEN_FCN (icode) (target, op0, op1, op2);
23119 if (! pat)
23120 return 0;
23121 emit_insn (pat);
23122 return target;
23124 case ARM_BUILTIN_WZERO:
23125 target = gen_reg_rtx (DImode);
23126 emit_insn (gen_iwmmxt_clrdi (target));
23127 return target;
23129 case ARM_BUILTIN_WSRLHI:
23130 case ARM_BUILTIN_WSRLWI:
23131 case ARM_BUILTIN_WSRLDI:
23132 case ARM_BUILTIN_WSLLHI:
23133 case ARM_BUILTIN_WSLLWI:
23134 case ARM_BUILTIN_WSLLDI:
23135 case ARM_BUILTIN_WSRAHI:
23136 case ARM_BUILTIN_WSRAWI:
23137 case ARM_BUILTIN_WSRADI:
23138 case ARM_BUILTIN_WRORHI:
23139 case ARM_BUILTIN_WRORWI:
23140 case ARM_BUILTIN_WRORDI:
23141 case ARM_BUILTIN_WSRLH:
23142 case ARM_BUILTIN_WSRLW:
23143 case ARM_BUILTIN_WSRLD:
23144 case ARM_BUILTIN_WSLLH:
23145 case ARM_BUILTIN_WSLLW:
23146 case ARM_BUILTIN_WSLLD:
23147 case ARM_BUILTIN_WSRAH:
23148 case ARM_BUILTIN_WSRAW:
23149 case ARM_BUILTIN_WSRAD:
23150 case ARM_BUILTIN_WRORH:
23151 case ARM_BUILTIN_WRORW:
23152 case ARM_BUILTIN_WRORD:
23153 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
23154 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
23155 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
23156 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
23157 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
23158 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
23159 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
23160 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
23161 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
23162 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
23163 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
23164 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
23165 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
23166 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
23167 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
23168 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
23169 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
23170 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
23171 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
23172 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
23173 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
23174 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
23175 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
23176 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
23177 : CODE_FOR_nothing);
23178 arg1 = CALL_EXPR_ARG (exp, 1);
23179 op1 = expand_normal (arg1);
23180 if (GET_MODE (op1) == VOIDmode)
23182 imm = INTVAL (op1);
23183 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
23184 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
23185 && (imm < 0 || imm > 32))
23187 if (fcode == ARM_BUILTIN_WRORHI)
23188 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
23189 else if (fcode == ARM_BUILTIN_WRORWI)
23190 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
23191 else if (fcode == ARM_BUILTIN_WRORH)
23192 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
23193 else
23194 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
23196 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
23197 && (imm < 0 || imm > 64))
23199 if (fcode == ARM_BUILTIN_WRORDI)
23200 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
23201 else
23202 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
23204 else if (imm < 0)
23206 if (fcode == ARM_BUILTIN_WSRLHI)
23207 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
23208 else if (fcode == ARM_BUILTIN_WSRLWI)
23209 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
23210 else if (fcode == ARM_BUILTIN_WSRLDI)
23211 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
23212 else if (fcode == ARM_BUILTIN_WSLLHI)
23213 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
23214 else if (fcode == ARM_BUILTIN_WSLLWI)
23215 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
23216 else if (fcode == ARM_BUILTIN_WSLLDI)
23217 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
23218 else if (fcode == ARM_BUILTIN_WSRAHI)
23219 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
23220 else if (fcode == ARM_BUILTIN_WSRAWI)
23221 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
23222 else if (fcode == ARM_BUILTIN_WSRADI)
23223 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
23224 else if (fcode == ARM_BUILTIN_WSRLH)
23225 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
23226 else if (fcode == ARM_BUILTIN_WSRLW)
23227 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
23228 else if (fcode == ARM_BUILTIN_WSRLD)
23229 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
23230 else if (fcode == ARM_BUILTIN_WSLLH)
23231 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
23232 else if (fcode == ARM_BUILTIN_WSLLW)
23233 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
23234 else if (fcode == ARM_BUILTIN_WSLLD)
23235 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
23236 else if (fcode == ARM_BUILTIN_WSRAH)
23237 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
23238 else if (fcode == ARM_BUILTIN_WSRAW)
23239 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
23240 else
23241 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
23244 return arm_expand_binop_builtin (icode, exp, target);
23246 default:
23247 break;
23250 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
23251 if (d->code == (const enum arm_builtins) fcode)
23252 return arm_expand_binop_builtin (d->icode, exp, target);
23254 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
23255 if (d->code == (const enum arm_builtins) fcode)
23256 return arm_expand_unop_builtin (d->icode, exp, target, 0);
23258 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
23259 if (d->code == (const enum arm_builtins) fcode)
23260 return arm_expand_ternop_builtin (d->icode, exp, target);
23262 /* @@@ Should really do something sensible here. */
23263 return NULL_RTX;
23266 /* Return the number (counting from 0) of
23267 the least significant set bit in MASK. */
23269 inline static int
23270 number_of_first_bit_set (unsigned mask)
23272 return ctz_hwi (mask);
23275 /* Like emit_multi_reg_push, but allowing for a different set of
23276 registers to be described as saved. MASK is the set of registers
23277 to be saved; REAL_REGS is the set of registers to be described as
23278 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23280 static rtx
23281 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23283 unsigned long regno;
23284 rtx par[10], tmp, reg, insn;
23285 int i, j;
23287 /* Build the parallel of the registers actually being stored. */
23288 for (i = 0; mask; ++i, mask &= mask - 1)
23290 regno = ctz_hwi (mask);
23291 reg = gen_rtx_REG (SImode, regno);
23293 if (i == 0)
23294 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23295 else
23296 tmp = gen_rtx_USE (VOIDmode, reg);
23298 par[i] = tmp;
23301 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23302 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23303 tmp = gen_frame_mem (BLKmode, tmp);
23304 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23305 par[0] = tmp;
23307 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23308 insn = emit_insn (tmp);
23310 /* Always build the stack adjustment note for unwind info. */
23311 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23312 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23313 par[0] = tmp;
23315 /* Build the parallel of the registers recorded as saved for unwind. */
23316 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23318 regno = ctz_hwi (real_regs);
23319 reg = gen_rtx_REG (SImode, regno);
23321 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23322 tmp = gen_frame_mem (SImode, tmp);
23323 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23324 RTX_FRAME_RELATED_P (tmp) = 1;
23325 par[j + 1] = tmp;
23328 if (j == 0)
23329 tmp = par[0];
23330 else
23332 RTX_FRAME_RELATED_P (par[0]) = 1;
23333 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23336 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23338 return insn;
23341 /* Emit code to push or pop registers to or from the stack. F is the
23342 assembly file. MASK is the registers to pop. */
23343 static void
23344 thumb_pop (FILE *f, unsigned long mask)
23346 int regno;
23347 int lo_mask = mask & 0xFF;
23348 int pushed_words = 0;
23350 gcc_assert (mask);
23352 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23354 /* Special case. Do not generate a POP PC statement here, do it in
23355 thumb_exit() */
23356 thumb_exit (f, -1);
23357 return;
23360 fprintf (f, "\tpop\t{");
23362 /* Look at the low registers first. */
23363 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23365 if (lo_mask & 1)
23367 asm_fprintf (f, "%r", regno);
23369 if ((lo_mask & ~1) != 0)
23370 fprintf (f, ", ");
23372 pushed_words++;
23376 if (mask & (1 << PC_REGNUM))
23378 /* Catch popping the PC. */
23379 if (TARGET_INTERWORK || TARGET_BACKTRACE
23380 || crtl->calls_eh_return)
23382 /* The PC is never poped directly, instead
23383 it is popped into r3 and then BX is used. */
23384 fprintf (f, "}\n");
23386 thumb_exit (f, -1);
23388 return;
23390 else
23392 if (mask & 0xFF)
23393 fprintf (f, ", ");
23395 asm_fprintf (f, "%r", PC_REGNUM);
23399 fprintf (f, "}\n");
23402 /* Generate code to return from a thumb function.
23403 If 'reg_containing_return_addr' is -1, then the return address is
23404 actually on the stack, at the stack pointer. */
23405 static void
23406 thumb_exit (FILE *f, int reg_containing_return_addr)
23408 unsigned regs_available_for_popping;
23409 unsigned regs_to_pop;
23410 int pops_needed;
23411 unsigned available;
23412 unsigned required;
23413 int mode;
23414 int size;
23415 int restore_a4 = FALSE;
23417 /* Compute the registers we need to pop. */
23418 regs_to_pop = 0;
23419 pops_needed = 0;
23421 if (reg_containing_return_addr == -1)
23423 regs_to_pop |= 1 << LR_REGNUM;
23424 ++pops_needed;
23427 if (TARGET_BACKTRACE)
23429 /* Restore the (ARM) frame pointer and stack pointer. */
23430 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23431 pops_needed += 2;
23434 /* If there is nothing to pop then just emit the BX instruction and
23435 return. */
23436 if (pops_needed == 0)
23438 if (crtl->calls_eh_return)
23439 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23441 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23442 return;
23444 /* Otherwise if we are not supporting interworking and we have not created
23445 a backtrace structure and the function was not entered in ARM mode then
23446 just pop the return address straight into the PC. */
23447 else if (!TARGET_INTERWORK
23448 && !TARGET_BACKTRACE
23449 && !is_called_in_ARM_mode (current_function_decl)
23450 && !crtl->calls_eh_return)
23452 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23453 return;
23456 /* Find out how many of the (return) argument registers we can corrupt. */
23457 regs_available_for_popping = 0;
23459 /* If returning via __builtin_eh_return, the bottom three registers
23460 all contain information needed for the return. */
23461 if (crtl->calls_eh_return)
23462 size = 12;
23463 else
23465 /* If we can deduce the registers used from the function's
23466 return value. This is more reliable that examining
23467 df_regs_ever_live_p () because that will be set if the register is
23468 ever used in the function, not just if the register is used
23469 to hold a return value. */
23471 if (crtl->return_rtx != 0)
23472 mode = GET_MODE (crtl->return_rtx);
23473 else
23474 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23476 size = GET_MODE_SIZE (mode);
23478 if (size == 0)
23480 /* In a void function we can use any argument register.
23481 In a function that returns a structure on the stack
23482 we can use the second and third argument registers. */
23483 if (mode == VOIDmode)
23484 regs_available_for_popping =
23485 (1 << ARG_REGISTER (1))
23486 | (1 << ARG_REGISTER (2))
23487 | (1 << ARG_REGISTER (3));
23488 else
23489 regs_available_for_popping =
23490 (1 << ARG_REGISTER (2))
23491 | (1 << ARG_REGISTER (3));
23493 else if (size <= 4)
23494 regs_available_for_popping =
23495 (1 << ARG_REGISTER (2))
23496 | (1 << ARG_REGISTER (3));
23497 else if (size <= 8)
23498 regs_available_for_popping =
23499 (1 << ARG_REGISTER (3));
23502 /* Match registers to be popped with registers into which we pop them. */
23503 for (available = regs_available_for_popping,
23504 required = regs_to_pop;
23505 required != 0 && available != 0;
23506 available &= ~(available & - available),
23507 required &= ~(required & - required))
23508 -- pops_needed;
23510 /* If we have any popping registers left over, remove them. */
23511 if (available > 0)
23512 regs_available_for_popping &= ~available;
23514 /* Otherwise if we need another popping register we can use
23515 the fourth argument register. */
23516 else if (pops_needed)
23518 /* If we have not found any free argument registers and
23519 reg a4 contains the return address, we must move it. */
23520 if (regs_available_for_popping == 0
23521 && reg_containing_return_addr == LAST_ARG_REGNUM)
23523 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23524 reg_containing_return_addr = LR_REGNUM;
23526 else if (size > 12)
23528 /* Register a4 is being used to hold part of the return value,
23529 but we have dire need of a free, low register. */
23530 restore_a4 = TRUE;
23532 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23535 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23537 /* The fourth argument register is available. */
23538 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23540 --pops_needed;
23544 /* Pop as many registers as we can. */
23545 thumb_pop (f, regs_available_for_popping);
23547 /* Process the registers we popped. */
23548 if (reg_containing_return_addr == -1)
23550 /* The return address was popped into the lowest numbered register. */
23551 regs_to_pop &= ~(1 << LR_REGNUM);
23553 reg_containing_return_addr =
23554 number_of_first_bit_set (regs_available_for_popping);
23556 /* Remove this register for the mask of available registers, so that
23557 the return address will not be corrupted by further pops. */
23558 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23561 /* If we popped other registers then handle them here. */
23562 if (regs_available_for_popping)
23564 int frame_pointer;
23566 /* Work out which register currently contains the frame pointer. */
23567 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23569 /* Move it into the correct place. */
23570 asm_fprintf (f, "\tmov\t%r, %r\n",
23571 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23573 /* (Temporarily) remove it from the mask of popped registers. */
23574 regs_available_for_popping &= ~(1 << frame_pointer);
23575 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23577 if (regs_available_for_popping)
23579 int stack_pointer;
23581 /* We popped the stack pointer as well,
23582 find the register that contains it. */
23583 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23585 /* Move it into the stack register. */
23586 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23588 /* At this point we have popped all necessary registers, so
23589 do not worry about restoring regs_available_for_popping
23590 to its correct value:
23592 assert (pops_needed == 0)
23593 assert (regs_available_for_popping == (1 << frame_pointer))
23594 assert (regs_to_pop == (1 << STACK_POINTER)) */
23596 else
23598 /* Since we have just move the popped value into the frame
23599 pointer, the popping register is available for reuse, and
23600 we know that we still have the stack pointer left to pop. */
23601 regs_available_for_popping |= (1 << frame_pointer);
23605 /* If we still have registers left on the stack, but we no longer have
23606 any registers into which we can pop them, then we must move the return
23607 address into the link register and make available the register that
23608 contained it. */
23609 if (regs_available_for_popping == 0 && pops_needed > 0)
23611 regs_available_for_popping |= 1 << reg_containing_return_addr;
23613 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23614 reg_containing_return_addr);
23616 reg_containing_return_addr = LR_REGNUM;
23619 /* If we have registers left on the stack then pop some more.
23620 We know that at most we will want to pop FP and SP. */
23621 if (pops_needed > 0)
23623 int popped_into;
23624 int move_to;
23626 thumb_pop (f, regs_available_for_popping);
23628 /* We have popped either FP or SP.
23629 Move whichever one it is into the correct register. */
23630 popped_into = number_of_first_bit_set (regs_available_for_popping);
23631 move_to = number_of_first_bit_set (regs_to_pop);
23633 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23635 regs_to_pop &= ~(1 << move_to);
23637 --pops_needed;
23640 /* If we still have not popped everything then we must have only
23641 had one register available to us and we are now popping the SP. */
23642 if (pops_needed > 0)
23644 int popped_into;
23646 thumb_pop (f, regs_available_for_popping);
23648 popped_into = number_of_first_bit_set (regs_available_for_popping);
23650 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23652 assert (regs_to_pop == (1 << STACK_POINTER))
23653 assert (pops_needed == 1)
23657 /* If necessary restore the a4 register. */
23658 if (restore_a4)
23660 if (reg_containing_return_addr != LR_REGNUM)
23662 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23663 reg_containing_return_addr = LR_REGNUM;
23666 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23669 if (crtl->calls_eh_return)
23670 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23672 /* Return to caller. */
23673 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23676 /* Scan INSN just before assembler is output for it.
23677 For Thumb-1, we track the status of the condition codes; this
23678 information is used in the cbranchsi4_insn pattern. */
23679 void
23680 thumb1_final_prescan_insn (rtx insn)
23682 if (flag_print_asm_name)
23683 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23684 INSN_ADDRESSES (INSN_UID (insn)));
23685 /* Don't overwrite the previous setter when we get to a cbranch. */
23686 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23688 enum attr_conds conds;
23690 if (cfun->machine->thumb1_cc_insn)
23692 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23693 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23694 CC_STATUS_INIT;
23696 conds = get_attr_conds (insn);
23697 if (conds == CONDS_SET)
23699 rtx set = single_set (insn);
23700 cfun->machine->thumb1_cc_insn = insn;
23701 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23702 cfun->machine->thumb1_cc_op1 = const0_rtx;
23703 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23704 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23706 rtx src1 = XEXP (SET_SRC (set), 1);
23707 if (src1 == const0_rtx)
23708 cfun->machine->thumb1_cc_mode = CCmode;
23710 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23712 /* Record the src register operand instead of dest because
23713 cprop_hardreg pass propagates src. */
23714 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23717 else if (conds != CONDS_NOCOND)
23718 cfun->machine->thumb1_cc_insn = NULL_RTX;
23723 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23725 unsigned HOST_WIDE_INT mask = 0xff;
23726 int i;
23728 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23729 if (val == 0) /* XXX */
23730 return 0;
23732 for (i = 0; i < 25; i++)
23733 if ((val & (mask << i)) == val)
23734 return 1;
23736 return 0;
23739 /* Returns nonzero if the current function contains,
23740 or might contain a far jump. */
23741 static int
23742 thumb_far_jump_used_p (void)
23744 rtx insn;
23746 /* This test is only important for leaf functions. */
23747 /* assert (!leaf_function_p ()); */
23749 /* If we have already decided that far jumps may be used,
23750 do not bother checking again, and always return true even if
23751 it turns out that they are not being used. Once we have made
23752 the decision that far jumps are present (and that hence the link
23753 register will be pushed onto the stack) we cannot go back on it. */
23754 if (cfun->machine->far_jump_used)
23755 return 1;
23757 /* If this function is not being called from the prologue/epilogue
23758 generation code then it must be being called from the
23759 INITIAL_ELIMINATION_OFFSET macro. */
23760 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23762 /* In this case we know that we are being asked about the elimination
23763 of the arg pointer register. If that register is not being used,
23764 then there are no arguments on the stack, and we do not have to
23765 worry that a far jump might force the prologue to push the link
23766 register, changing the stack offsets. In this case we can just
23767 return false, since the presence of far jumps in the function will
23768 not affect stack offsets.
23770 If the arg pointer is live (or if it was live, but has now been
23771 eliminated and so set to dead) then we do have to test to see if
23772 the function might contain a far jump. This test can lead to some
23773 false negatives, since before reload is completed, then length of
23774 branch instructions is not known, so gcc defaults to returning their
23775 longest length, which in turn sets the far jump attribute to true.
23777 A false negative will not result in bad code being generated, but it
23778 will result in a needless push and pop of the link register. We
23779 hope that this does not occur too often.
23781 If we need doubleword stack alignment this could affect the other
23782 elimination offsets so we can't risk getting it wrong. */
23783 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23784 cfun->machine->arg_pointer_live = 1;
23785 else if (!cfun->machine->arg_pointer_live)
23786 return 0;
23789 /* Check to see if the function contains a branch
23790 insn with the far jump attribute set. */
23791 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23793 if (JUMP_P (insn)
23794 /* Ignore tablejump patterns. */
23795 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23796 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
23797 && get_attr_far_jump (insn) == FAR_JUMP_YES
23800 /* Record the fact that we have decided that
23801 the function does use far jumps. */
23802 cfun->machine->far_jump_used = 1;
23803 return 1;
23807 return 0;
23810 /* Return nonzero if FUNC must be entered in ARM mode. */
23812 is_called_in_ARM_mode (tree func)
23814 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23816 /* Ignore the problem about functions whose address is taken. */
23817 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23818 return TRUE;
23820 #ifdef ARM_PE
23821 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23822 #else
23823 return FALSE;
23824 #endif
23827 /* Given the stack offsets and register mask in OFFSETS, decide how
23828 many additional registers to push instead of subtracting a constant
23829 from SP. For epilogues the principle is the same except we use pop.
23830 FOR_PROLOGUE indicates which we're generating. */
23831 static int
23832 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23834 HOST_WIDE_INT amount;
23835 unsigned long live_regs_mask = offsets->saved_regs_mask;
23836 /* Extract a mask of the ones we can give to the Thumb's push/pop
23837 instruction. */
23838 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23839 /* Then count how many other high registers will need to be pushed. */
23840 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23841 int n_free, reg_base, size;
23843 if (!for_prologue && frame_pointer_needed)
23844 amount = offsets->locals_base - offsets->saved_regs;
23845 else
23846 amount = offsets->outgoing_args - offsets->saved_regs;
23848 /* If the stack frame size is 512 exactly, we can save one load
23849 instruction, which should make this a win even when optimizing
23850 for speed. */
23851 if (!optimize_size && amount != 512)
23852 return 0;
23854 /* Can't do this if there are high registers to push. */
23855 if (high_regs_pushed != 0)
23856 return 0;
23858 /* Shouldn't do it in the prologue if no registers would normally
23859 be pushed at all. In the epilogue, also allow it if we'll have
23860 a pop insn for the PC. */
23861 if (l_mask == 0
23862 && (for_prologue
23863 || TARGET_BACKTRACE
23864 || (live_regs_mask & 1 << LR_REGNUM) == 0
23865 || TARGET_INTERWORK
23866 || crtl->args.pretend_args_size != 0))
23867 return 0;
23869 /* Don't do this if thumb_expand_prologue wants to emit instructions
23870 between the push and the stack frame allocation. */
23871 if (for_prologue
23872 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23873 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23874 return 0;
23876 reg_base = 0;
23877 n_free = 0;
23878 if (!for_prologue)
23880 size = arm_size_return_regs ();
23881 reg_base = ARM_NUM_INTS (size);
23882 live_regs_mask >>= reg_base;
23885 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23886 && (for_prologue || call_used_regs[reg_base + n_free]))
23888 live_regs_mask >>= 1;
23889 n_free++;
23892 if (n_free == 0)
23893 return 0;
23894 gcc_assert (amount / 4 * 4 == amount);
23896 if (amount >= 512 && (amount - n_free * 4) < 512)
23897 return (amount - 508) / 4;
23898 if (amount <= n_free * 4)
23899 return amount / 4;
23900 return 0;
23903 /* The bits which aren't usefully expanded as rtl. */
23904 const char *
23905 thumb1_unexpanded_epilogue (void)
23907 arm_stack_offsets *offsets;
23908 int regno;
23909 unsigned long live_regs_mask = 0;
23910 int high_regs_pushed = 0;
23911 int extra_pop;
23912 int had_to_push_lr;
23913 int size;
23915 if (cfun->machine->return_used_this_function != 0)
23916 return "";
23918 if (IS_NAKED (arm_current_func_type ()))
23919 return "";
23921 offsets = arm_get_frame_offsets ();
23922 live_regs_mask = offsets->saved_regs_mask;
23923 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23925 /* If we can deduce the registers used from the function's return value.
23926 This is more reliable that examining df_regs_ever_live_p () because that
23927 will be set if the register is ever used in the function, not just if
23928 the register is used to hold a return value. */
23929 size = arm_size_return_regs ();
23931 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23932 if (extra_pop > 0)
23934 unsigned long extra_mask = (1 << extra_pop) - 1;
23935 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23938 /* The prolog may have pushed some high registers to use as
23939 work registers. e.g. the testsuite file:
23940 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23941 compiles to produce:
23942 push {r4, r5, r6, r7, lr}
23943 mov r7, r9
23944 mov r6, r8
23945 push {r6, r7}
23946 as part of the prolog. We have to undo that pushing here. */
23948 if (high_regs_pushed)
23950 unsigned long mask = live_regs_mask & 0xff;
23951 int next_hi_reg;
23953 /* The available low registers depend on the size of the value we are
23954 returning. */
23955 if (size <= 12)
23956 mask |= 1 << 3;
23957 if (size <= 8)
23958 mask |= 1 << 2;
23960 if (mask == 0)
23961 /* Oh dear! We have no low registers into which we can pop
23962 high registers! */
23963 internal_error
23964 ("no low registers available for popping high registers");
23966 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23967 if (live_regs_mask & (1 << next_hi_reg))
23968 break;
23970 while (high_regs_pushed)
23972 /* Find lo register(s) into which the high register(s) can
23973 be popped. */
23974 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23976 if (mask & (1 << regno))
23977 high_regs_pushed--;
23978 if (high_regs_pushed == 0)
23979 break;
23982 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23984 /* Pop the values into the low register(s). */
23985 thumb_pop (asm_out_file, mask);
23987 /* Move the value(s) into the high registers. */
23988 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23990 if (mask & (1 << regno))
23992 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23993 regno);
23995 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23996 if (live_regs_mask & (1 << next_hi_reg))
23997 break;
24001 live_regs_mask &= ~0x0f00;
24004 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24005 live_regs_mask &= 0xff;
24007 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24009 /* Pop the return address into the PC. */
24010 if (had_to_push_lr)
24011 live_regs_mask |= 1 << PC_REGNUM;
24013 /* Either no argument registers were pushed or a backtrace
24014 structure was created which includes an adjusted stack
24015 pointer, so just pop everything. */
24016 if (live_regs_mask)
24017 thumb_pop (asm_out_file, live_regs_mask);
24019 /* We have either just popped the return address into the
24020 PC or it is was kept in LR for the entire function.
24021 Note that thumb_pop has already called thumb_exit if the
24022 PC was in the list. */
24023 if (!had_to_push_lr)
24024 thumb_exit (asm_out_file, LR_REGNUM);
24026 else
24028 /* Pop everything but the return address. */
24029 if (live_regs_mask)
24030 thumb_pop (asm_out_file, live_regs_mask);
24032 if (had_to_push_lr)
24034 if (size > 12)
24036 /* We have no free low regs, so save one. */
24037 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24038 LAST_ARG_REGNUM);
24041 /* Get the return address into a temporary register. */
24042 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24044 if (size > 12)
24046 /* Move the return address to lr. */
24047 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24048 LAST_ARG_REGNUM);
24049 /* Restore the low register. */
24050 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24051 IP_REGNUM);
24052 regno = LR_REGNUM;
24054 else
24055 regno = LAST_ARG_REGNUM;
24057 else
24058 regno = LR_REGNUM;
24060 /* Remove the argument registers that were pushed onto the stack. */
24061 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24062 SP_REGNUM, SP_REGNUM,
24063 crtl->args.pretend_args_size);
24065 thumb_exit (asm_out_file, regno);
24068 return "";
24071 /* Functions to save and restore machine-specific function data. */
24072 static struct machine_function *
24073 arm_init_machine_status (void)
24075 struct machine_function *machine;
24076 machine = ggc_alloc_cleared_machine_function ();
24078 #if ARM_FT_UNKNOWN != 0
24079 machine->func_type = ARM_FT_UNKNOWN;
24080 #endif
24081 return machine;
24084 /* Return an RTX indicating where the return address to the
24085 calling function can be found. */
24087 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24089 if (count != 0)
24090 return NULL_RTX;
24092 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24095 /* Do anything needed before RTL is emitted for each function. */
24096 void
24097 arm_init_expanders (void)
24099 /* Arrange to initialize and mark the machine per-function status. */
24100 init_machine_status = arm_init_machine_status;
24102 /* This is to stop the combine pass optimizing away the alignment
24103 adjustment of va_arg. */
24104 /* ??? It is claimed that this should not be necessary. */
24105 if (cfun)
24106 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24110 /* Like arm_compute_initial_elimination offset. Simpler because there
24111 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24112 to point at the base of the local variables after static stack
24113 space for a function has been allocated. */
24115 HOST_WIDE_INT
24116 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24118 arm_stack_offsets *offsets;
24120 offsets = arm_get_frame_offsets ();
24122 switch (from)
24124 case ARG_POINTER_REGNUM:
24125 switch (to)
24127 case STACK_POINTER_REGNUM:
24128 return offsets->outgoing_args - offsets->saved_args;
24130 case FRAME_POINTER_REGNUM:
24131 return offsets->soft_frame - offsets->saved_args;
24133 case ARM_HARD_FRAME_POINTER_REGNUM:
24134 return offsets->saved_regs - offsets->saved_args;
24136 case THUMB_HARD_FRAME_POINTER_REGNUM:
24137 return offsets->locals_base - offsets->saved_args;
24139 default:
24140 gcc_unreachable ();
24142 break;
24144 case FRAME_POINTER_REGNUM:
24145 switch (to)
24147 case STACK_POINTER_REGNUM:
24148 return offsets->outgoing_args - offsets->soft_frame;
24150 case ARM_HARD_FRAME_POINTER_REGNUM:
24151 return offsets->saved_regs - offsets->soft_frame;
24153 case THUMB_HARD_FRAME_POINTER_REGNUM:
24154 return offsets->locals_base - offsets->soft_frame;
24156 default:
24157 gcc_unreachable ();
24159 break;
24161 default:
24162 gcc_unreachable ();
24166 /* Generate the function's prologue. */
24168 void
24169 thumb1_expand_prologue (void)
24171 rtx insn;
24173 HOST_WIDE_INT amount;
24174 arm_stack_offsets *offsets;
24175 unsigned long func_type;
24176 int regno;
24177 unsigned long live_regs_mask;
24178 unsigned long l_mask;
24179 unsigned high_regs_pushed = 0;
24181 func_type = arm_current_func_type ();
24183 /* Naked functions don't have prologues. */
24184 if (IS_NAKED (func_type))
24185 return;
24187 if (IS_INTERRUPT (func_type))
24189 error ("interrupt Service Routines cannot be coded in Thumb mode");
24190 return;
24193 if (is_called_in_ARM_mode (current_function_decl))
24194 emit_insn (gen_prologue_thumb1_interwork ());
24196 offsets = arm_get_frame_offsets ();
24197 live_regs_mask = offsets->saved_regs_mask;
24199 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24200 l_mask = live_regs_mask & 0x40ff;
24201 /* Then count how many other high registers will need to be pushed. */
24202 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24204 if (crtl->args.pretend_args_size)
24206 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24208 if (cfun->machine->uses_anonymous_args)
24210 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24211 unsigned long mask;
24213 mask = 1ul << (LAST_ARG_REGNUM + 1);
24214 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24216 insn = thumb1_emit_multi_reg_push (mask, 0);
24218 else
24220 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24221 stack_pointer_rtx, x));
24223 RTX_FRAME_RELATED_P (insn) = 1;
24226 if (TARGET_BACKTRACE)
24228 HOST_WIDE_INT offset = 0;
24229 unsigned work_register;
24230 rtx work_reg, x, arm_hfp_rtx;
24232 /* We have been asked to create a stack backtrace structure.
24233 The code looks like this:
24235 0 .align 2
24236 0 func:
24237 0 sub SP, #16 Reserve space for 4 registers.
24238 2 push {R7} Push low registers.
24239 4 add R7, SP, #20 Get the stack pointer before the push.
24240 6 str R7, [SP, #8] Store the stack pointer
24241 (before reserving the space).
24242 8 mov R7, PC Get hold of the start of this code + 12.
24243 10 str R7, [SP, #16] Store it.
24244 12 mov R7, FP Get hold of the current frame pointer.
24245 14 str R7, [SP, #4] Store it.
24246 16 mov R7, LR Get hold of the current return address.
24247 18 str R7, [SP, #12] Store it.
24248 20 add R7, SP, #16 Point at the start of the
24249 backtrace structure.
24250 22 mov FP, R7 Put this value into the frame pointer. */
24252 work_register = thumb_find_work_register (live_regs_mask);
24253 work_reg = gen_rtx_REG (SImode, work_register);
24254 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24256 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24257 stack_pointer_rtx, GEN_INT (-16)));
24258 RTX_FRAME_RELATED_P (insn) = 1;
24260 if (l_mask)
24262 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24263 RTX_FRAME_RELATED_P (insn) = 1;
24265 offset = bit_count (l_mask) * UNITS_PER_WORD;
24268 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24269 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24271 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24272 x = gen_frame_mem (SImode, x);
24273 emit_move_insn (x, work_reg);
24275 /* Make sure that the instruction fetching the PC is in the right place
24276 to calculate "start of backtrace creation code + 12". */
24277 /* ??? The stores using the common WORK_REG ought to be enough to
24278 prevent the scheduler from doing anything weird. Failing that
24279 we could always move all of the following into an UNSPEC_VOLATILE. */
24280 if (l_mask)
24282 x = gen_rtx_REG (SImode, PC_REGNUM);
24283 emit_move_insn (work_reg, x);
24285 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24286 x = gen_frame_mem (SImode, x);
24287 emit_move_insn (x, work_reg);
24289 emit_move_insn (work_reg, arm_hfp_rtx);
24291 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24292 x = gen_frame_mem (SImode, x);
24293 emit_move_insn (x, work_reg);
24295 else
24297 emit_move_insn (work_reg, arm_hfp_rtx);
24299 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24300 x = gen_frame_mem (SImode, x);
24301 emit_move_insn (x, work_reg);
24303 x = gen_rtx_REG (SImode, PC_REGNUM);
24304 emit_move_insn (work_reg, x);
24306 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24307 x = gen_frame_mem (SImode, x);
24308 emit_move_insn (x, work_reg);
24311 x = gen_rtx_REG (SImode, LR_REGNUM);
24312 emit_move_insn (work_reg, x);
24314 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24315 x = gen_frame_mem (SImode, x);
24316 emit_move_insn (x, work_reg);
24318 x = GEN_INT (offset + 12);
24319 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24321 emit_move_insn (arm_hfp_rtx, work_reg);
24323 /* Optimization: If we are not pushing any low registers but we are going
24324 to push some high registers then delay our first push. This will just
24325 be a push of LR and we can combine it with the push of the first high
24326 register. */
24327 else if ((l_mask & 0xff) != 0
24328 || (high_regs_pushed == 0 && l_mask))
24330 unsigned long mask = l_mask;
24331 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24332 insn = thumb1_emit_multi_reg_push (mask, mask);
24333 RTX_FRAME_RELATED_P (insn) = 1;
24336 if (high_regs_pushed)
24338 unsigned pushable_regs;
24339 unsigned next_hi_reg;
24340 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24341 : crtl->args.info.nregs;
24342 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24344 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24345 if (live_regs_mask & (1 << next_hi_reg))
24346 break;
24348 /* Here we need to mask out registers used for passing arguments
24349 even if they can be pushed. This is to avoid using them to stash the high
24350 registers. Such kind of stash may clobber the use of arguments. */
24351 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24353 if (pushable_regs == 0)
24354 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24356 while (high_regs_pushed > 0)
24358 unsigned long real_regs_mask = 0;
24360 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24362 if (pushable_regs & (1 << regno))
24364 emit_move_insn (gen_rtx_REG (SImode, regno),
24365 gen_rtx_REG (SImode, next_hi_reg));
24367 high_regs_pushed --;
24368 real_regs_mask |= (1 << next_hi_reg);
24370 if (high_regs_pushed)
24372 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24373 next_hi_reg --)
24374 if (live_regs_mask & (1 << next_hi_reg))
24375 break;
24377 else
24379 pushable_regs &= ~((1 << regno) - 1);
24380 break;
24385 /* If we had to find a work register and we have not yet
24386 saved the LR then add it to the list of regs to push. */
24387 if (l_mask == (1 << LR_REGNUM))
24389 pushable_regs |= l_mask;
24390 real_regs_mask |= l_mask;
24391 l_mask = 0;
24394 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24395 RTX_FRAME_RELATED_P (insn) = 1;
24399 /* Load the pic register before setting the frame pointer,
24400 so we can use r7 as a temporary work register. */
24401 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24402 arm_load_pic_register (live_regs_mask);
24404 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24405 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24406 stack_pointer_rtx);
24408 if (flag_stack_usage_info)
24409 current_function_static_stack_size
24410 = offsets->outgoing_args - offsets->saved_args;
24412 amount = offsets->outgoing_args - offsets->saved_regs;
24413 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24414 if (amount)
24416 if (amount < 512)
24418 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24419 GEN_INT (- amount)));
24420 RTX_FRAME_RELATED_P (insn) = 1;
24422 else
24424 rtx reg, dwarf;
24426 /* The stack decrement is too big for an immediate value in a single
24427 insn. In theory we could issue multiple subtracts, but after
24428 three of them it becomes more space efficient to place the full
24429 value in the constant pool and load into a register. (Also the
24430 ARM debugger really likes to see only one stack decrement per
24431 function). So instead we look for a scratch register into which
24432 we can load the decrement, and then we subtract this from the
24433 stack pointer. Unfortunately on the thumb the only available
24434 scratch registers are the argument registers, and we cannot use
24435 these as they may hold arguments to the function. Instead we
24436 attempt to locate a call preserved register which is used by this
24437 function. If we can find one, then we know that it will have
24438 been pushed at the start of the prologue and so we can corrupt
24439 it now. */
24440 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24441 if (live_regs_mask & (1 << regno))
24442 break;
24444 gcc_assert(regno <= LAST_LO_REGNUM);
24446 reg = gen_rtx_REG (SImode, regno);
24448 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24450 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24451 stack_pointer_rtx, reg));
24453 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24454 plus_constant (Pmode, stack_pointer_rtx,
24455 -amount));
24456 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24457 RTX_FRAME_RELATED_P (insn) = 1;
24461 if (frame_pointer_needed)
24462 thumb_set_frame_pointer (offsets);
24464 /* If we are profiling, make sure no instructions are scheduled before
24465 the call to mcount. Similarly if the user has requested no
24466 scheduling in the prolog. Similarly if we want non-call exceptions
24467 using the EABI unwinder, to prevent faulting instructions from being
24468 swapped with a stack adjustment. */
24469 if (crtl->profile || !TARGET_SCHED_PROLOG
24470 || (arm_except_unwind_info (&global_options) == UI_TARGET
24471 && cfun->can_throw_non_call_exceptions))
24472 emit_insn (gen_blockage ());
24474 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24475 if (live_regs_mask & 0xff)
24476 cfun->machine->lr_save_eliminated = 0;
24479 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24480 POP instruction can be generated. LR should be replaced by PC. All
24481 the checks required are already done by USE_RETURN_INSN (). Hence,
24482 all we really need to check here is if single register is to be
24483 returned, or multiple register return. */
24484 void
24485 thumb2_expand_return (bool simple_return)
24487 int i, num_regs;
24488 unsigned long saved_regs_mask;
24489 arm_stack_offsets *offsets;
24491 offsets = arm_get_frame_offsets ();
24492 saved_regs_mask = offsets->saved_regs_mask;
24494 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24495 if (saved_regs_mask & (1 << i))
24496 num_regs++;
24498 if (!simple_return && saved_regs_mask)
24500 if (num_regs == 1)
24502 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24503 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24504 rtx addr = gen_rtx_MEM (SImode,
24505 gen_rtx_POST_INC (SImode,
24506 stack_pointer_rtx));
24507 set_mem_alias_set (addr, get_frame_alias_set ());
24508 XVECEXP (par, 0, 0) = ret_rtx;
24509 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24510 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24511 emit_jump_insn (par);
24513 else
24515 saved_regs_mask &= ~ (1 << LR_REGNUM);
24516 saved_regs_mask |= (1 << PC_REGNUM);
24517 arm_emit_multi_reg_pop (saved_regs_mask);
24520 else
24522 emit_jump_insn (simple_return_rtx);
24526 void
24527 thumb1_expand_epilogue (void)
24529 HOST_WIDE_INT amount;
24530 arm_stack_offsets *offsets;
24531 int regno;
24533 /* Naked functions don't have prologues. */
24534 if (IS_NAKED (arm_current_func_type ()))
24535 return;
24537 offsets = arm_get_frame_offsets ();
24538 amount = offsets->outgoing_args - offsets->saved_regs;
24540 if (frame_pointer_needed)
24542 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24543 amount = offsets->locals_base - offsets->saved_regs;
24545 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24547 gcc_assert (amount >= 0);
24548 if (amount)
24550 emit_insn (gen_blockage ());
24552 if (amount < 512)
24553 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24554 GEN_INT (amount)));
24555 else
24557 /* r3 is always free in the epilogue. */
24558 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24560 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24561 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24565 /* Emit a USE (stack_pointer_rtx), so that
24566 the stack adjustment will not be deleted. */
24567 emit_insn (gen_force_register_use (stack_pointer_rtx));
24569 if (crtl->profile || !TARGET_SCHED_PROLOG)
24570 emit_insn (gen_blockage ());
24572 /* Emit a clobber for each insn that will be restored in the epilogue,
24573 so that flow2 will get register lifetimes correct. */
24574 for (regno = 0; regno < 13; regno++)
24575 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24576 emit_clobber (gen_rtx_REG (SImode, regno));
24578 if (! df_regs_ever_live_p (LR_REGNUM))
24579 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24582 /* Epilogue code for APCS frame. */
24583 static void
24584 arm_expand_epilogue_apcs_frame (bool really_return)
24586 unsigned long func_type;
24587 unsigned long saved_regs_mask;
24588 int num_regs = 0;
24589 int i;
24590 int floats_from_frame = 0;
24591 arm_stack_offsets *offsets;
24593 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24594 func_type = arm_current_func_type ();
24596 /* Get frame offsets for ARM. */
24597 offsets = arm_get_frame_offsets ();
24598 saved_regs_mask = offsets->saved_regs_mask;
24600 /* Find the offset of the floating-point save area in the frame. */
24601 floats_from_frame = offsets->saved_args - offsets->frame;
24603 /* Compute how many core registers saved and how far away the floats are. */
24604 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24605 if (saved_regs_mask & (1 << i))
24607 num_regs++;
24608 floats_from_frame += 4;
24611 if (TARGET_HARD_FLOAT && TARGET_VFP)
24613 int start_reg;
24614 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24616 /* The offset is from IP_REGNUM. */
24617 int saved_size = arm_get_vfp_saved_size ();
24618 if (saved_size > 0)
24620 rtx insn;
24621 floats_from_frame += saved_size;
24622 insn = emit_insn (gen_addsi3 (ip_rtx,
24623 hard_frame_pointer_rtx,
24624 GEN_INT (-floats_from_frame)));
24625 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24626 ip_rtx, hard_frame_pointer_rtx);
24629 /* Generate VFP register multi-pop. */
24630 start_reg = FIRST_VFP_REGNUM;
24632 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24633 /* Look for a case where a reg does not need restoring. */
24634 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24635 && (!df_regs_ever_live_p (i + 1)
24636 || call_used_regs[i + 1]))
24638 if (start_reg != i)
24639 arm_emit_vfp_multi_reg_pop (start_reg,
24640 (i - start_reg) / 2,
24641 gen_rtx_REG (SImode,
24642 IP_REGNUM));
24643 start_reg = i + 2;
24646 /* Restore the remaining regs that we have discovered (or possibly
24647 even all of them, if the conditional in the for loop never
24648 fired). */
24649 if (start_reg != i)
24650 arm_emit_vfp_multi_reg_pop (start_reg,
24651 (i - start_reg) / 2,
24652 gen_rtx_REG (SImode, IP_REGNUM));
24655 if (TARGET_IWMMXT)
24657 /* The frame pointer is guaranteed to be non-double-word aligned, as
24658 it is set to double-word-aligned old_stack_pointer - 4. */
24659 rtx insn;
24660 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24662 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24663 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24665 rtx addr = gen_frame_mem (V2SImode,
24666 plus_constant (Pmode, hard_frame_pointer_rtx,
24667 - lrm_count * 4));
24668 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24669 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24670 gen_rtx_REG (V2SImode, i),
24671 NULL_RTX);
24672 lrm_count += 2;
24676 /* saved_regs_mask should contain IP which contains old stack pointer
24677 at the time of activation creation. Since SP and IP are adjacent registers,
24678 we can restore the value directly into SP. */
24679 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24680 saved_regs_mask &= ~(1 << IP_REGNUM);
24681 saved_regs_mask |= (1 << SP_REGNUM);
24683 /* There are two registers left in saved_regs_mask - LR and PC. We
24684 only need to restore LR (the return address), but to
24685 save time we can load it directly into PC, unless we need a
24686 special function exit sequence, or we are not really returning. */
24687 if (really_return
24688 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24689 && !crtl->calls_eh_return)
24690 /* Delete LR from the register mask, so that LR on
24691 the stack is loaded into the PC in the register mask. */
24692 saved_regs_mask &= ~(1 << LR_REGNUM);
24693 else
24694 saved_regs_mask &= ~(1 << PC_REGNUM);
24696 num_regs = bit_count (saved_regs_mask);
24697 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24699 rtx insn;
24700 emit_insn (gen_blockage ());
24701 /* Unwind the stack to just below the saved registers. */
24702 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24703 hard_frame_pointer_rtx,
24704 GEN_INT (- 4 * num_regs)));
24706 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24707 stack_pointer_rtx, hard_frame_pointer_rtx);
24710 arm_emit_multi_reg_pop (saved_regs_mask);
24712 if (IS_INTERRUPT (func_type))
24714 /* Interrupt handlers will have pushed the
24715 IP onto the stack, so restore it now. */
24716 rtx insn;
24717 rtx addr = gen_rtx_MEM (SImode,
24718 gen_rtx_POST_INC (SImode,
24719 stack_pointer_rtx));
24720 set_mem_alias_set (addr, get_frame_alias_set ());
24721 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24722 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24723 gen_rtx_REG (SImode, IP_REGNUM),
24724 NULL_RTX);
24727 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24728 return;
24730 if (crtl->calls_eh_return)
24731 emit_insn (gen_addsi3 (stack_pointer_rtx,
24732 stack_pointer_rtx,
24733 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24735 if (IS_STACKALIGN (func_type))
24736 /* Restore the original stack pointer. Before prologue, the stack was
24737 realigned and the original stack pointer saved in r0. For details,
24738 see comment in arm_expand_prologue. */
24739 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24741 emit_jump_insn (simple_return_rtx);
24744 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24745 function is not a sibcall. */
24746 void
24747 arm_expand_epilogue (bool really_return)
24749 unsigned long func_type;
24750 unsigned long saved_regs_mask;
24751 int num_regs = 0;
24752 int i;
24753 int amount;
24754 arm_stack_offsets *offsets;
24756 func_type = arm_current_func_type ();
24758 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24759 let output_return_instruction take care of instruction emition if any. */
24760 if (IS_NAKED (func_type)
24761 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24763 if (really_return)
24764 emit_jump_insn (simple_return_rtx);
24765 return;
24768 /* If we are throwing an exception, then we really must be doing a
24769 return, so we can't tail-call. */
24770 gcc_assert (!crtl->calls_eh_return || really_return);
24772 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24774 arm_expand_epilogue_apcs_frame (really_return);
24775 return;
24778 /* Get frame offsets for ARM. */
24779 offsets = arm_get_frame_offsets ();
24780 saved_regs_mask = offsets->saved_regs_mask;
24781 num_regs = bit_count (saved_regs_mask);
24783 if (frame_pointer_needed)
24785 rtx insn;
24786 /* Restore stack pointer if necessary. */
24787 if (TARGET_ARM)
24789 /* In ARM mode, frame pointer points to first saved register.
24790 Restore stack pointer to last saved register. */
24791 amount = offsets->frame - offsets->saved_regs;
24793 /* Force out any pending memory operations that reference stacked data
24794 before stack de-allocation occurs. */
24795 emit_insn (gen_blockage ());
24796 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24797 hard_frame_pointer_rtx,
24798 GEN_INT (amount)));
24799 arm_add_cfa_adjust_cfa_note (insn, amount,
24800 stack_pointer_rtx,
24801 hard_frame_pointer_rtx);
24803 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24804 deleted. */
24805 emit_insn (gen_force_register_use (stack_pointer_rtx));
24807 else
24809 /* In Thumb-2 mode, the frame pointer points to the last saved
24810 register. */
24811 amount = offsets->locals_base - offsets->saved_regs;
24812 if (amount)
24814 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24815 hard_frame_pointer_rtx,
24816 GEN_INT (amount)));
24817 arm_add_cfa_adjust_cfa_note (insn, amount,
24818 hard_frame_pointer_rtx,
24819 hard_frame_pointer_rtx);
24822 /* Force out any pending memory operations that reference stacked data
24823 before stack de-allocation occurs. */
24824 emit_insn (gen_blockage ());
24825 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24826 hard_frame_pointer_rtx));
24827 arm_add_cfa_adjust_cfa_note (insn, 0,
24828 stack_pointer_rtx,
24829 hard_frame_pointer_rtx);
24830 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24831 deleted. */
24832 emit_insn (gen_force_register_use (stack_pointer_rtx));
24835 else
24837 /* Pop off outgoing args and local frame to adjust stack pointer to
24838 last saved register. */
24839 amount = offsets->outgoing_args - offsets->saved_regs;
24840 if (amount)
24842 rtx tmp;
24843 /* Force out any pending memory operations that reference stacked data
24844 before stack de-allocation occurs. */
24845 emit_insn (gen_blockage ());
24846 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24847 stack_pointer_rtx,
24848 GEN_INT (amount)));
24849 arm_add_cfa_adjust_cfa_note (tmp, amount,
24850 stack_pointer_rtx, stack_pointer_rtx);
24851 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24852 not deleted. */
24853 emit_insn (gen_force_register_use (stack_pointer_rtx));
24857 if (TARGET_HARD_FLOAT && TARGET_VFP)
24859 /* Generate VFP register multi-pop. */
24860 int end_reg = LAST_VFP_REGNUM + 1;
24862 /* Scan the registers in reverse order. We need to match
24863 any groupings made in the prologue and generate matching
24864 vldm operations. The need to match groups is because,
24865 unlike pop, vldm can only do consecutive regs. */
24866 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24867 /* Look for a case where a reg does not need restoring. */
24868 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24869 && (!df_regs_ever_live_p (i + 1)
24870 || call_used_regs[i + 1]))
24872 /* Restore the regs discovered so far (from reg+2 to
24873 end_reg). */
24874 if (end_reg > i + 2)
24875 arm_emit_vfp_multi_reg_pop (i + 2,
24876 (end_reg - (i + 2)) / 2,
24877 stack_pointer_rtx);
24878 end_reg = i;
24881 /* Restore the remaining regs that we have discovered (or possibly
24882 even all of them, if the conditional in the for loop never
24883 fired). */
24884 if (end_reg > i + 2)
24885 arm_emit_vfp_multi_reg_pop (i + 2,
24886 (end_reg - (i + 2)) / 2,
24887 stack_pointer_rtx);
24890 if (TARGET_IWMMXT)
24891 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24892 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24894 rtx insn;
24895 rtx addr = gen_rtx_MEM (V2SImode,
24896 gen_rtx_POST_INC (SImode,
24897 stack_pointer_rtx));
24898 set_mem_alias_set (addr, get_frame_alias_set ());
24899 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24900 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24901 gen_rtx_REG (V2SImode, i),
24902 NULL_RTX);
24903 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24904 stack_pointer_rtx, stack_pointer_rtx);
24907 if (saved_regs_mask)
24909 rtx insn;
24910 bool return_in_pc = false;
24912 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24913 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24914 && !IS_STACKALIGN (func_type)
24915 && really_return
24916 && crtl->args.pretend_args_size == 0
24917 && saved_regs_mask & (1 << LR_REGNUM)
24918 && !crtl->calls_eh_return)
24920 saved_regs_mask &= ~(1 << LR_REGNUM);
24921 saved_regs_mask |= (1 << PC_REGNUM);
24922 return_in_pc = true;
24925 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24927 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24928 if (saved_regs_mask & (1 << i))
24930 rtx addr = gen_rtx_MEM (SImode,
24931 gen_rtx_POST_INC (SImode,
24932 stack_pointer_rtx));
24933 set_mem_alias_set (addr, get_frame_alias_set ());
24935 if (i == PC_REGNUM)
24937 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24938 XVECEXP (insn, 0, 0) = ret_rtx;
24939 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
24940 gen_rtx_REG (SImode, i),
24941 addr);
24942 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24943 insn = emit_jump_insn (insn);
24945 else
24947 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24948 addr));
24949 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24950 gen_rtx_REG (SImode, i),
24951 NULL_RTX);
24952 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24953 stack_pointer_rtx,
24954 stack_pointer_rtx);
24958 else
24960 if (TARGET_LDRD
24961 && current_tune->prefer_ldrd_strd
24962 && !optimize_function_for_size_p (cfun))
24964 if (TARGET_THUMB2)
24965 thumb2_emit_ldrd_pop (saved_regs_mask);
24966 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24967 arm_emit_ldrd_pop (saved_regs_mask);
24968 else
24969 arm_emit_multi_reg_pop (saved_regs_mask);
24971 else
24972 arm_emit_multi_reg_pop (saved_regs_mask);
24975 if (return_in_pc == true)
24976 return;
24979 if (crtl->args.pretend_args_size)
24981 int i, j;
24982 rtx dwarf = NULL_RTX;
24983 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24984 stack_pointer_rtx,
24985 GEN_INT (crtl->args.pretend_args_size)));
24987 RTX_FRAME_RELATED_P (tmp) = 1;
24989 if (cfun->machine->uses_anonymous_args)
24991 /* Restore pretend args. Refer arm_expand_prologue on how to save
24992 pretend_args in stack. */
24993 int num_regs = crtl->args.pretend_args_size / 4;
24994 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24995 for (j = 0, i = 0; j < num_regs; i++)
24996 if (saved_regs_mask & (1 << i))
24998 rtx reg = gen_rtx_REG (SImode, i);
24999 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25000 j++;
25002 REG_NOTES (tmp) = dwarf;
25004 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25005 stack_pointer_rtx, stack_pointer_rtx);
25008 if (!really_return)
25009 return;
25011 if (crtl->calls_eh_return)
25012 emit_insn (gen_addsi3 (stack_pointer_rtx,
25013 stack_pointer_rtx,
25014 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25016 if (IS_STACKALIGN (func_type))
25017 /* Restore the original stack pointer. Before prologue, the stack was
25018 realigned and the original stack pointer saved in r0. For details,
25019 see comment in arm_expand_prologue. */
25020 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25022 emit_jump_insn (simple_return_rtx);
25025 /* Implementation of insn prologue_thumb1_interwork. This is the first
25026 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25028 const char *
25029 thumb1_output_interwork (void)
25031 const char * name;
25032 FILE *f = asm_out_file;
25034 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25035 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25036 == SYMBOL_REF);
25037 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25039 /* Generate code sequence to switch us into Thumb mode. */
25040 /* The .code 32 directive has already been emitted by
25041 ASM_DECLARE_FUNCTION_NAME. */
25042 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25043 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25045 /* Generate a label, so that the debugger will notice the
25046 change in instruction sets. This label is also used by
25047 the assembler to bypass the ARM code when this function
25048 is called from a Thumb encoded function elsewhere in the
25049 same file. Hence the definition of STUB_NAME here must
25050 agree with the definition in gas/config/tc-arm.c. */
25052 #define STUB_NAME ".real_start_of"
25054 fprintf (f, "\t.code\t16\n");
25055 #ifdef ARM_PE
25056 if (arm_dllexport_name_p (name))
25057 name = arm_strip_name_encoding (name);
25058 #endif
25059 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25060 fprintf (f, "\t.thumb_func\n");
25061 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25063 return "";
25066 /* Handle the case of a double word load into a low register from
25067 a computed memory address. The computed address may involve a
25068 register which is overwritten by the load. */
25069 const char *
25070 thumb_load_double_from_address (rtx *operands)
25072 rtx addr;
25073 rtx base;
25074 rtx offset;
25075 rtx arg1;
25076 rtx arg2;
25078 gcc_assert (REG_P (operands[0]));
25079 gcc_assert (MEM_P (operands[1]));
25081 /* Get the memory address. */
25082 addr = XEXP (operands[1], 0);
25084 /* Work out how the memory address is computed. */
25085 switch (GET_CODE (addr))
25087 case REG:
25088 operands[2] = adjust_address (operands[1], SImode, 4);
25090 if (REGNO (operands[0]) == REGNO (addr))
25092 output_asm_insn ("ldr\t%H0, %2", operands);
25093 output_asm_insn ("ldr\t%0, %1", operands);
25095 else
25097 output_asm_insn ("ldr\t%0, %1", operands);
25098 output_asm_insn ("ldr\t%H0, %2", operands);
25100 break;
25102 case CONST:
25103 /* Compute <address> + 4 for the high order load. */
25104 operands[2] = adjust_address (operands[1], SImode, 4);
25106 output_asm_insn ("ldr\t%0, %1", operands);
25107 output_asm_insn ("ldr\t%H0, %2", operands);
25108 break;
25110 case PLUS:
25111 arg1 = XEXP (addr, 0);
25112 arg2 = XEXP (addr, 1);
25114 if (CONSTANT_P (arg1))
25115 base = arg2, offset = arg1;
25116 else
25117 base = arg1, offset = arg2;
25119 gcc_assert (REG_P (base));
25121 /* Catch the case of <address> = <reg> + <reg> */
25122 if (REG_P (offset))
25124 int reg_offset = REGNO (offset);
25125 int reg_base = REGNO (base);
25126 int reg_dest = REGNO (operands[0]);
25128 /* Add the base and offset registers together into the
25129 higher destination register. */
25130 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25131 reg_dest + 1, reg_base, reg_offset);
25133 /* Load the lower destination register from the address in
25134 the higher destination register. */
25135 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25136 reg_dest, reg_dest + 1);
25138 /* Load the higher destination register from its own address
25139 plus 4. */
25140 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25141 reg_dest + 1, reg_dest + 1);
25143 else
25145 /* Compute <address> + 4 for the high order load. */
25146 operands[2] = adjust_address (operands[1], SImode, 4);
25148 /* If the computed address is held in the low order register
25149 then load the high order register first, otherwise always
25150 load the low order register first. */
25151 if (REGNO (operands[0]) == REGNO (base))
25153 output_asm_insn ("ldr\t%H0, %2", operands);
25154 output_asm_insn ("ldr\t%0, %1", operands);
25156 else
25158 output_asm_insn ("ldr\t%0, %1", operands);
25159 output_asm_insn ("ldr\t%H0, %2", operands);
25162 break;
25164 case LABEL_REF:
25165 /* With no registers to worry about we can just load the value
25166 directly. */
25167 operands[2] = adjust_address (operands[1], SImode, 4);
25169 output_asm_insn ("ldr\t%H0, %2", operands);
25170 output_asm_insn ("ldr\t%0, %1", operands);
25171 break;
25173 default:
25174 gcc_unreachable ();
25177 return "";
25180 const char *
25181 thumb_output_move_mem_multiple (int n, rtx *operands)
25183 rtx tmp;
25185 switch (n)
25187 case 2:
25188 if (REGNO (operands[4]) > REGNO (operands[5]))
25190 tmp = operands[4];
25191 operands[4] = operands[5];
25192 operands[5] = tmp;
25194 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25195 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25196 break;
25198 case 3:
25199 if (REGNO (operands[4]) > REGNO (operands[5]))
25201 tmp = operands[4];
25202 operands[4] = operands[5];
25203 operands[5] = tmp;
25205 if (REGNO (operands[5]) > REGNO (operands[6]))
25207 tmp = operands[5];
25208 operands[5] = operands[6];
25209 operands[6] = tmp;
25211 if (REGNO (operands[4]) > REGNO (operands[5]))
25213 tmp = operands[4];
25214 operands[4] = operands[5];
25215 operands[5] = tmp;
25218 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25219 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25220 break;
25222 default:
25223 gcc_unreachable ();
25226 return "";
25229 /* Output a call-via instruction for thumb state. */
25230 const char *
25231 thumb_call_via_reg (rtx reg)
25233 int regno = REGNO (reg);
25234 rtx *labelp;
25236 gcc_assert (regno < LR_REGNUM);
25238 /* If we are in the normal text section we can use a single instance
25239 per compilation unit. If we are doing function sections, then we need
25240 an entry per section, since we can't rely on reachability. */
25241 if (in_section == text_section)
25243 thumb_call_reg_needed = 1;
25245 if (thumb_call_via_label[regno] == NULL)
25246 thumb_call_via_label[regno] = gen_label_rtx ();
25247 labelp = thumb_call_via_label + regno;
25249 else
25251 if (cfun->machine->call_via[regno] == NULL)
25252 cfun->machine->call_via[regno] = gen_label_rtx ();
25253 labelp = cfun->machine->call_via + regno;
25256 output_asm_insn ("bl\t%a0", labelp);
25257 return "";
25260 /* Routines for generating rtl. */
25261 void
25262 thumb_expand_movmemqi (rtx *operands)
25264 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25265 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25266 HOST_WIDE_INT len = INTVAL (operands[2]);
25267 HOST_WIDE_INT offset = 0;
25269 while (len >= 12)
25271 emit_insn (gen_movmem12b (out, in, out, in));
25272 len -= 12;
25275 if (len >= 8)
25277 emit_insn (gen_movmem8b (out, in, out, in));
25278 len -= 8;
25281 if (len >= 4)
25283 rtx reg = gen_reg_rtx (SImode);
25284 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25285 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25286 len -= 4;
25287 offset += 4;
25290 if (len >= 2)
25292 rtx reg = gen_reg_rtx (HImode);
25293 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25294 plus_constant (Pmode, in,
25295 offset))));
25296 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25297 offset)),
25298 reg));
25299 len -= 2;
25300 offset += 2;
25303 if (len)
25305 rtx reg = gen_reg_rtx (QImode);
25306 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25307 plus_constant (Pmode, in,
25308 offset))));
25309 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25310 offset)),
25311 reg));
25315 void
25316 thumb_reload_out_hi (rtx *operands)
25318 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25321 /* Handle reading a half-word from memory during reload. */
25322 void
25323 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25325 gcc_unreachable ();
25328 /* Return the length of a function name prefix
25329 that starts with the character 'c'. */
25330 static int
25331 arm_get_strip_length (int c)
25333 switch (c)
25335 ARM_NAME_ENCODING_LENGTHS
25336 default: return 0;
25340 /* Return a pointer to a function's name with any
25341 and all prefix encodings stripped from it. */
25342 const char *
25343 arm_strip_name_encoding (const char *name)
25345 int skip;
25347 while ((skip = arm_get_strip_length (* name)))
25348 name += skip;
25350 return name;
25353 /* If there is a '*' anywhere in the name's prefix, then
25354 emit the stripped name verbatim, otherwise prepend an
25355 underscore if leading underscores are being used. */
25356 void
25357 arm_asm_output_labelref (FILE *stream, const char *name)
25359 int skip;
25360 int verbatim = 0;
25362 while ((skip = arm_get_strip_length (* name)))
25364 verbatim |= (*name == '*');
25365 name += skip;
25368 if (verbatim)
25369 fputs (name, stream);
25370 else
25371 asm_fprintf (stream, "%U%s", name);
25374 /* This function is used to emit an EABI tag and its associated value.
25375 We emit the numerical value of the tag in case the assembler does not
25376 support textual tags. (Eg gas prior to 2.20). If requested we include
25377 the tag name in a comment so that anyone reading the assembler output
25378 will know which tag is being set.
25380 This function is not static because arm-c.c needs it too. */
25382 void
25383 arm_emit_eabi_attribute (const char *name, int num, int val)
25385 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25386 if (flag_verbose_asm || flag_debug_asm)
25387 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25388 asm_fprintf (asm_out_file, "\n");
25391 static void
25392 arm_file_start (void)
25394 int val;
25396 if (TARGET_UNIFIED_ASM)
25397 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25399 if (TARGET_BPABI)
25401 const char *fpu_name;
25402 if (arm_selected_arch)
25404 const char* pos = strchr (arm_selected_arch->name, '+');
25405 if (pos)
25407 char buf[15];
25408 gcc_assert (strlen (arm_selected_arch->name)
25409 <= sizeof (buf) / sizeof (*pos));
25410 strncpy (buf, arm_selected_arch->name,
25411 (pos - arm_selected_arch->name) * sizeof (*pos));
25412 buf[pos - arm_selected_arch->name] = '\0';
25413 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25414 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25416 else
25417 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25419 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25420 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25421 else
25422 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
25424 if (TARGET_SOFT_FLOAT)
25426 fpu_name = "softvfp";
25428 else
25430 fpu_name = arm_fpu_desc->name;
25431 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25433 if (TARGET_HARD_FLOAT)
25434 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25435 if (TARGET_HARD_FLOAT_ABI)
25436 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25439 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25441 /* Some of these attributes only apply when the corresponding features
25442 are used. However we don't have any easy way of figuring this out.
25443 Conservatively record the setting that would have been used. */
25445 if (flag_rounding_math)
25446 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25448 if (!flag_unsafe_math_optimizations)
25450 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25451 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25453 if (flag_signaling_nans)
25454 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25456 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25457 flag_finite_math_only ? 1 : 3);
25459 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25460 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25461 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25462 flag_short_enums ? 1 : 2);
25464 /* Tag_ABI_optimization_goals. */
25465 if (optimize_size)
25466 val = 4;
25467 else if (optimize >= 2)
25468 val = 2;
25469 else if (optimize)
25470 val = 1;
25471 else
25472 val = 6;
25473 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25475 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25476 unaligned_access);
25478 if (arm_fp16_format)
25479 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25480 (int) arm_fp16_format);
25482 if (arm_lang_output_object_attributes_hook)
25483 arm_lang_output_object_attributes_hook();
25486 default_file_start ();
25489 static void
25490 arm_file_end (void)
25492 int regno;
25494 if (NEED_INDICATE_EXEC_STACK)
25495 /* Add .note.GNU-stack. */
25496 file_end_indicate_exec_stack ();
25498 if (! thumb_call_reg_needed)
25499 return;
25501 switch_to_section (text_section);
25502 asm_fprintf (asm_out_file, "\t.code 16\n");
25503 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25505 for (regno = 0; regno < LR_REGNUM; regno++)
25507 rtx label = thumb_call_via_label[regno];
25509 if (label != 0)
25511 targetm.asm_out.internal_label (asm_out_file, "L",
25512 CODE_LABEL_NUMBER (label));
25513 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25518 #ifndef ARM_PE
25519 /* Symbols in the text segment can be accessed without indirecting via the
25520 constant pool; it may take an extra binary operation, but this is still
25521 faster than indirecting via memory. Don't do this when not optimizing,
25522 since we won't be calculating al of the offsets necessary to do this
25523 simplification. */
25525 static void
25526 arm_encode_section_info (tree decl, rtx rtl, int first)
25528 if (optimize > 0 && TREE_CONSTANT (decl))
25529 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25531 default_encode_section_info (decl, rtl, first);
25533 #endif /* !ARM_PE */
25535 static void
25536 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25538 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25539 && !strcmp (prefix, "L"))
25541 arm_ccfsm_state = 0;
25542 arm_target_insn = NULL;
25544 default_internal_label (stream, prefix, labelno);
25547 /* Output code to add DELTA to the first argument, and then jump
25548 to FUNCTION. Used for C++ multiple inheritance. */
25549 static void
25550 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25551 HOST_WIDE_INT delta,
25552 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25553 tree function)
25555 static int thunk_label = 0;
25556 char label[256];
25557 char labelpc[256];
25558 int mi_delta = delta;
25559 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25560 int shift = 0;
25561 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25562 ? 1 : 0);
25563 if (mi_delta < 0)
25564 mi_delta = - mi_delta;
25566 final_start_function (emit_barrier (), file, 1);
25568 if (TARGET_THUMB1)
25570 int labelno = thunk_label++;
25571 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25572 /* Thunks are entered in arm mode when avaiable. */
25573 if (TARGET_THUMB1_ONLY)
25575 /* push r3 so we can use it as a temporary. */
25576 /* TODO: Omit this save if r3 is not used. */
25577 fputs ("\tpush {r3}\n", file);
25578 fputs ("\tldr\tr3, ", file);
25580 else
25582 fputs ("\tldr\tr12, ", file);
25584 assemble_name (file, label);
25585 fputc ('\n', file);
25586 if (flag_pic)
25588 /* If we are generating PIC, the ldr instruction below loads
25589 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25590 the address of the add + 8, so we have:
25592 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25593 = target + 1.
25595 Note that we have "+ 1" because some versions of GNU ld
25596 don't set the low bit of the result for R_ARM_REL32
25597 relocations against thumb function symbols.
25598 On ARMv6M this is +4, not +8. */
25599 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25600 assemble_name (file, labelpc);
25601 fputs (":\n", file);
25602 if (TARGET_THUMB1_ONLY)
25604 /* This is 2 insns after the start of the thunk, so we know it
25605 is 4-byte aligned. */
25606 fputs ("\tadd\tr3, pc, r3\n", file);
25607 fputs ("\tmov r12, r3\n", file);
25609 else
25610 fputs ("\tadd\tr12, pc, r12\n", file);
25612 else if (TARGET_THUMB1_ONLY)
25613 fputs ("\tmov r12, r3\n", file);
25615 if (TARGET_THUMB1_ONLY)
25617 if (mi_delta > 255)
25619 fputs ("\tldr\tr3, ", file);
25620 assemble_name (file, label);
25621 fputs ("+4\n", file);
25622 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
25623 mi_op, this_regno, this_regno);
25625 else if (mi_delta != 0)
25627 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25628 mi_op, this_regno, this_regno,
25629 mi_delta);
25632 else
25634 /* TODO: Use movw/movt for large constants when available. */
25635 while (mi_delta != 0)
25637 if ((mi_delta & (3 << shift)) == 0)
25638 shift += 2;
25639 else
25641 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25642 mi_op, this_regno, this_regno,
25643 mi_delta & (0xff << shift));
25644 mi_delta &= ~(0xff << shift);
25645 shift += 8;
25649 if (TARGET_THUMB1)
25651 if (TARGET_THUMB1_ONLY)
25652 fputs ("\tpop\t{r3}\n", file);
25654 fprintf (file, "\tbx\tr12\n");
25655 ASM_OUTPUT_ALIGN (file, 2);
25656 assemble_name (file, label);
25657 fputs (":\n", file);
25658 if (flag_pic)
25660 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25661 rtx tem = XEXP (DECL_RTL (function), 0);
25662 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25663 pipeline offset is four rather than eight. Adjust the offset
25664 accordingly. */
25665 tem = plus_constant (GET_MODE (tem), tem,
25666 TARGET_THUMB1_ONLY ? -3 : -7);
25667 tem = gen_rtx_MINUS (GET_MODE (tem),
25668 tem,
25669 gen_rtx_SYMBOL_REF (Pmode,
25670 ggc_strdup (labelpc)));
25671 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25673 else
25674 /* Output ".word .LTHUNKn". */
25675 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25677 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25678 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25680 else
25682 fputs ("\tb\t", file);
25683 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25684 if (NEED_PLT_RELOC)
25685 fputs ("(PLT)", file);
25686 fputc ('\n', file);
25689 final_end_function ();
25693 arm_emit_vector_const (FILE *file, rtx x)
25695 int i;
25696 const char * pattern;
25698 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25700 switch (GET_MODE (x))
25702 case V2SImode: pattern = "%08x"; break;
25703 case V4HImode: pattern = "%04x"; break;
25704 case V8QImode: pattern = "%02x"; break;
25705 default: gcc_unreachable ();
25708 fprintf (file, "0x");
25709 for (i = CONST_VECTOR_NUNITS (x); i--;)
25711 rtx element;
25713 element = CONST_VECTOR_ELT (x, i);
25714 fprintf (file, pattern, INTVAL (element));
25717 return 1;
25720 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25721 HFmode constant pool entries are actually loaded with ldr. */
25722 void
25723 arm_emit_fp16_const (rtx c)
25725 REAL_VALUE_TYPE r;
25726 long bits;
25728 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25729 bits = real_to_target (NULL, &r, HFmode);
25730 if (WORDS_BIG_ENDIAN)
25731 assemble_zeros (2);
25732 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25733 if (!WORDS_BIG_ENDIAN)
25734 assemble_zeros (2);
25737 const char *
25738 arm_output_load_gr (rtx *operands)
25740 rtx reg;
25741 rtx offset;
25742 rtx wcgr;
25743 rtx sum;
25745 if (!MEM_P (operands [1])
25746 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25747 || !REG_P (reg = XEXP (sum, 0))
25748 || !CONST_INT_P (offset = XEXP (sum, 1))
25749 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25750 return "wldrw%?\t%0, %1";
25752 /* Fix up an out-of-range load of a GR register. */
25753 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25754 wcgr = operands[0];
25755 operands[0] = reg;
25756 output_asm_insn ("ldr%?\t%0, %1", operands);
25758 operands[0] = wcgr;
25759 operands[1] = reg;
25760 output_asm_insn ("tmcr%?\t%0, %1", operands);
25761 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25763 return "";
25766 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25768 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25769 named arg and all anonymous args onto the stack.
25770 XXX I know the prologue shouldn't be pushing registers, but it is faster
25771 that way. */
25773 static void
25774 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25775 enum machine_mode mode,
25776 tree type,
25777 int *pretend_size,
25778 int second_time ATTRIBUTE_UNUSED)
25780 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25781 int nregs;
25783 cfun->machine->uses_anonymous_args = 1;
25784 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25786 nregs = pcum->aapcs_ncrn;
25787 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25788 nregs++;
25790 else
25791 nregs = pcum->nregs;
25793 if (nregs < NUM_ARG_REGS)
25794 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25797 /* We can't rely on the caller doing the proper promotion when
25798 using APCS or ATPCS. */
25800 static bool
25801 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25803 return !TARGET_AAPCS_BASED;
25806 static enum machine_mode
25807 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25808 enum machine_mode mode,
25809 int *punsignedp ATTRIBUTE_UNUSED,
25810 const_tree fntype ATTRIBUTE_UNUSED,
25811 int for_return ATTRIBUTE_UNUSED)
25813 if (GET_MODE_CLASS (mode) == MODE_INT
25814 && GET_MODE_SIZE (mode) < 4)
25815 return SImode;
25817 return mode;
25820 /* AAPCS based ABIs use short enums by default. */
25822 static bool
25823 arm_default_short_enums (void)
25825 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25829 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25831 static bool
25832 arm_align_anon_bitfield (void)
25834 return TARGET_AAPCS_BASED;
25838 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25840 static tree
25841 arm_cxx_guard_type (void)
25843 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25847 /* The EABI says test the least significant bit of a guard variable. */
25849 static bool
25850 arm_cxx_guard_mask_bit (void)
25852 return TARGET_AAPCS_BASED;
25856 /* The EABI specifies that all array cookies are 8 bytes long. */
25858 static tree
25859 arm_get_cookie_size (tree type)
25861 tree size;
25863 if (!TARGET_AAPCS_BASED)
25864 return default_cxx_get_cookie_size (type);
25866 size = build_int_cst (sizetype, 8);
25867 return size;
25871 /* The EABI says that array cookies should also contain the element size. */
25873 static bool
25874 arm_cookie_has_size (void)
25876 return TARGET_AAPCS_BASED;
25880 /* The EABI says constructors and destructors should return a pointer to
25881 the object constructed/destroyed. */
25883 static bool
25884 arm_cxx_cdtor_returns_this (void)
25886 return TARGET_AAPCS_BASED;
25889 /* The EABI says that an inline function may never be the key
25890 method. */
25892 static bool
25893 arm_cxx_key_method_may_be_inline (void)
25895 return !TARGET_AAPCS_BASED;
25898 static void
25899 arm_cxx_determine_class_data_visibility (tree decl)
25901 if (!TARGET_AAPCS_BASED
25902 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25903 return;
25905 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25906 is exported. However, on systems without dynamic vague linkage,
25907 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25908 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
25909 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
25910 else
25911 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
25912 DECL_VISIBILITY_SPECIFIED (decl) = 1;
25915 static bool
25916 arm_cxx_class_data_always_comdat (void)
25918 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25919 vague linkage if the class has no key function. */
25920 return !TARGET_AAPCS_BASED;
25924 /* The EABI says __aeabi_atexit should be used to register static
25925 destructors. */
25927 static bool
25928 arm_cxx_use_aeabi_atexit (void)
25930 return TARGET_AAPCS_BASED;
25934 void
25935 arm_set_return_address (rtx source, rtx scratch)
25937 arm_stack_offsets *offsets;
25938 HOST_WIDE_INT delta;
25939 rtx addr;
25940 unsigned long saved_regs;
25942 offsets = arm_get_frame_offsets ();
25943 saved_regs = offsets->saved_regs_mask;
25945 if ((saved_regs & (1 << LR_REGNUM)) == 0)
25946 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25947 else
25949 if (frame_pointer_needed)
25950 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25951 else
25953 /* LR will be the first saved register. */
25954 delta = offsets->outgoing_args - (offsets->frame + 4);
25957 if (delta >= 4096)
25959 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25960 GEN_INT (delta & ~4095)));
25961 addr = scratch;
25962 delta &= 4095;
25964 else
25965 addr = stack_pointer_rtx;
25967 addr = plus_constant (Pmode, addr, delta);
25969 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25974 void
25975 thumb_set_return_address (rtx source, rtx scratch)
25977 arm_stack_offsets *offsets;
25978 HOST_WIDE_INT delta;
25979 HOST_WIDE_INT limit;
25980 int reg;
25981 rtx addr;
25982 unsigned long mask;
25984 emit_use (source);
25986 offsets = arm_get_frame_offsets ();
25987 mask = offsets->saved_regs_mask;
25988 if (mask & (1 << LR_REGNUM))
25990 limit = 1024;
25991 /* Find the saved regs. */
25992 if (frame_pointer_needed)
25994 delta = offsets->soft_frame - offsets->saved_args;
25995 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25996 if (TARGET_THUMB1)
25997 limit = 128;
25999 else
26001 delta = offsets->outgoing_args - offsets->saved_args;
26002 reg = SP_REGNUM;
26004 /* Allow for the stack frame. */
26005 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26006 delta -= 16;
26007 /* The link register is always the first saved register. */
26008 delta -= 4;
26010 /* Construct the address. */
26011 addr = gen_rtx_REG (SImode, reg);
26012 if (delta > limit)
26014 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26015 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26016 addr = scratch;
26018 else
26019 addr = plus_constant (Pmode, addr, delta);
26021 emit_move_insn (gen_frame_mem (Pmode, addr), source);
26023 else
26024 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26027 /* Implements target hook vector_mode_supported_p. */
26028 bool
26029 arm_vector_mode_supported_p (enum machine_mode mode)
26031 /* Neon also supports V2SImode, etc. listed in the clause below. */
26032 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26033 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26034 return true;
26036 if ((TARGET_NEON || TARGET_IWMMXT)
26037 && ((mode == V2SImode)
26038 || (mode == V4HImode)
26039 || (mode == V8QImode)))
26040 return true;
26042 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26043 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26044 || mode == V2HAmode))
26045 return true;
26047 return false;
26050 /* Implements target hook array_mode_supported_p. */
26052 static bool
26053 arm_array_mode_supported_p (enum machine_mode mode,
26054 unsigned HOST_WIDE_INT nelems)
26056 if (TARGET_NEON
26057 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26058 && (nelems >= 2 && nelems <= 4))
26059 return true;
26061 return false;
26064 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26065 registers when autovectorizing for Neon, at least until multiple vector
26066 widths are supported properly by the middle-end. */
26068 static enum machine_mode
26069 arm_preferred_simd_mode (enum machine_mode mode)
26071 if (TARGET_NEON)
26072 switch (mode)
26074 case SFmode:
26075 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26076 case SImode:
26077 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26078 case HImode:
26079 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26080 case QImode:
26081 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26082 case DImode:
26083 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26084 return V2DImode;
26085 break;
26087 default:;
26090 if (TARGET_REALLY_IWMMXT)
26091 switch (mode)
26093 case SImode:
26094 return V2SImode;
26095 case HImode:
26096 return V4HImode;
26097 case QImode:
26098 return V8QImode;
26100 default:;
26103 return word_mode;
26106 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26108 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26109 using r0-r4 for function arguments, r7 for the stack frame and don't have
26110 enough left over to do doubleword arithmetic. For Thumb-2 all the
26111 potentially problematic instructions accept high registers so this is not
26112 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26113 that require many low registers. */
26114 static bool
26115 arm_class_likely_spilled_p (reg_class_t rclass)
26117 if ((TARGET_THUMB1 && rclass == LO_REGS)
26118 || rclass == CC_REG)
26119 return true;
26121 return false;
26124 /* Implements target hook small_register_classes_for_mode_p. */
26125 bool
26126 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
26128 return TARGET_THUMB1;
26131 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26132 ARM insns and therefore guarantee that the shift count is modulo 256.
26133 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26134 guarantee no particular behavior for out-of-range counts. */
26136 static unsigned HOST_WIDE_INT
26137 arm_shift_truncation_mask (enum machine_mode mode)
26139 return mode == SImode ? 255 : 0;
26143 /* Map internal gcc register numbers to DWARF2 register numbers. */
26145 unsigned int
26146 arm_dbx_register_number (unsigned int regno)
26148 if (regno < 16)
26149 return regno;
26151 if (IS_VFP_REGNUM (regno))
26153 /* See comment in arm_dwarf_register_span. */
26154 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26155 return 64 + regno - FIRST_VFP_REGNUM;
26156 else
26157 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26160 if (IS_IWMMXT_GR_REGNUM (regno))
26161 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26163 if (IS_IWMMXT_REGNUM (regno))
26164 return 112 + regno - FIRST_IWMMXT_REGNUM;
26166 gcc_unreachable ();
26169 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26170 GCC models tham as 64 32-bit registers, so we need to describe this to
26171 the DWARF generation code. Other registers can use the default. */
26172 static rtx
26173 arm_dwarf_register_span (rtx rtl)
26175 unsigned regno;
26176 int nregs;
26177 int i;
26178 rtx p;
26180 regno = REGNO (rtl);
26181 if (!IS_VFP_REGNUM (regno))
26182 return NULL_RTX;
26184 /* XXX FIXME: The EABI defines two VFP register ranges:
26185 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26186 256-287: D0-D31
26187 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26188 corresponding D register. Until GDB supports this, we shall use the
26189 legacy encodings. We also use these encodings for D0-D15 for
26190 compatibility with older debuggers. */
26191 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26192 return NULL_RTX;
26194 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
26195 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
26196 for (i = 0; i < nregs; i++)
26197 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
26199 return p;
26202 #if ARM_UNWIND_INFO
26203 /* Emit unwind directives for a store-multiple instruction or stack pointer
26204 push during alignment.
26205 These should only ever be generated by the function prologue code, so
26206 expect them to have a particular form. */
26208 static void
26209 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26211 int i;
26212 HOST_WIDE_INT offset;
26213 HOST_WIDE_INT nregs;
26214 int reg_size;
26215 unsigned reg;
26216 unsigned lastreg;
26217 rtx e;
26219 e = XVECEXP (p, 0, 0);
26220 if (GET_CODE (e) != SET)
26221 abort ();
26223 /* First insn will adjust the stack pointer. */
26224 if (GET_CODE (e) != SET
26225 || !REG_P (XEXP (e, 0))
26226 || REGNO (XEXP (e, 0)) != SP_REGNUM
26227 || GET_CODE (XEXP (e, 1)) != PLUS)
26228 abort ();
26230 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
26231 nregs = XVECLEN (p, 0) - 1;
26233 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
26234 if (reg < 16)
26236 /* The function prologue may also push pc, but not annotate it as it is
26237 never restored. We turn this into a stack pointer adjustment. */
26238 if (nregs * 4 == offset - 4)
26240 fprintf (asm_out_file, "\t.pad #4\n");
26241 offset -= 4;
26243 reg_size = 4;
26244 fprintf (asm_out_file, "\t.save {");
26246 else if (IS_VFP_REGNUM (reg))
26248 reg_size = 8;
26249 fprintf (asm_out_file, "\t.vsave {");
26251 else
26252 /* Unknown register type. */
26253 abort ();
26255 /* If the stack increment doesn't match the size of the saved registers,
26256 something has gone horribly wrong. */
26257 if (offset != nregs * reg_size)
26258 abort ();
26260 offset = 0;
26261 lastreg = 0;
26262 /* The remaining insns will describe the stores. */
26263 for (i = 1; i <= nregs; i++)
26265 /* Expect (set (mem <addr>) (reg)).
26266 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26267 e = XVECEXP (p, 0, i);
26268 if (GET_CODE (e) != SET
26269 || !MEM_P (XEXP (e, 0))
26270 || !REG_P (XEXP (e, 1)))
26271 abort ();
26273 reg = REGNO (XEXP (e, 1));
26274 if (reg < lastreg)
26275 abort ();
26277 if (i != 1)
26278 fprintf (asm_out_file, ", ");
26279 /* We can't use %r for vfp because we need to use the
26280 double precision register names. */
26281 if (IS_VFP_REGNUM (reg))
26282 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26283 else
26284 asm_fprintf (asm_out_file, "%r", reg);
26286 #ifdef ENABLE_CHECKING
26287 /* Check that the addresses are consecutive. */
26288 e = XEXP (XEXP (e, 0), 0);
26289 if (GET_CODE (e) == PLUS)
26291 offset += reg_size;
26292 if (!REG_P (XEXP (e, 0))
26293 || REGNO (XEXP (e, 0)) != SP_REGNUM
26294 || !CONST_INT_P (XEXP (e, 1))
26295 || offset != INTVAL (XEXP (e, 1)))
26296 abort ();
26298 else if (i != 1
26299 || !REG_P (e)
26300 || REGNO (e) != SP_REGNUM)
26301 abort ();
26302 #endif
26304 fprintf (asm_out_file, "}\n");
26307 /* Emit unwind directives for a SET. */
26309 static void
26310 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26312 rtx e0;
26313 rtx e1;
26314 unsigned reg;
26316 e0 = XEXP (p, 0);
26317 e1 = XEXP (p, 1);
26318 switch (GET_CODE (e0))
26320 case MEM:
26321 /* Pushing a single register. */
26322 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26323 || !REG_P (XEXP (XEXP (e0, 0), 0))
26324 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26325 abort ();
26327 asm_fprintf (asm_out_file, "\t.save ");
26328 if (IS_VFP_REGNUM (REGNO (e1)))
26329 asm_fprintf(asm_out_file, "{d%d}\n",
26330 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26331 else
26332 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26333 break;
26335 case REG:
26336 if (REGNO (e0) == SP_REGNUM)
26338 /* A stack increment. */
26339 if (GET_CODE (e1) != PLUS
26340 || !REG_P (XEXP (e1, 0))
26341 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26342 || !CONST_INT_P (XEXP (e1, 1)))
26343 abort ();
26345 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26346 -INTVAL (XEXP (e1, 1)));
26348 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26350 HOST_WIDE_INT offset;
26352 if (GET_CODE (e1) == PLUS)
26354 if (!REG_P (XEXP (e1, 0))
26355 || !CONST_INT_P (XEXP (e1, 1)))
26356 abort ();
26357 reg = REGNO (XEXP (e1, 0));
26358 offset = INTVAL (XEXP (e1, 1));
26359 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26360 HARD_FRAME_POINTER_REGNUM, reg,
26361 offset);
26363 else if (REG_P (e1))
26365 reg = REGNO (e1);
26366 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26367 HARD_FRAME_POINTER_REGNUM, reg);
26369 else
26370 abort ();
26372 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26374 /* Move from sp to reg. */
26375 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26377 else if (GET_CODE (e1) == PLUS
26378 && REG_P (XEXP (e1, 0))
26379 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26380 && CONST_INT_P (XEXP (e1, 1)))
26382 /* Set reg to offset from sp. */
26383 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26384 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26386 else
26387 abort ();
26388 break;
26390 default:
26391 abort ();
26396 /* Emit unwind directives for the given insn. */
26398 static void
26399 arm_unwind_emit (FILE * asm_out_file, rtx insn)
26401 rtx note, pat;
26402 bool handled_one = false;
26404 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26405 return;
26407 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26408 && (TREE_NOTHROW (current_function_decl)
26409 || crtl->all_throwers_are_sibcalls))
26410 return;
26412 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26413 return;
26415 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26417 pat = XEXP (note, 0);
26418 switch (REG_NOTE_KIND (note))
26420 case REG_FRAME_RELATED_EXPR:
26421 goto found;
26423 case REG_CFA_REGISTER:
26424 if (pat == NULL)
26426 pat = PATTERN (insn);
26427 if (GET_CODE (pat) == PARALLEL)
26428 pat = XVECEXP (pat, 0, 0);
26431 /* Only emitted for IS_STACKALIGN re-alignment. */
26433 rtx dest, src;
26434 unsigned reg;
26436 src = SET_SRC (pat);
26437 dest = SET_DEST (pat);
26439 gcc_assert (src == stack_pointer_rtx);
26440 reg = REGNO (dest);
26441 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26442 reg + 0x90, reg);
26444 handled_one = true;
26445 break;
26447 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26448 to get correct dwarf information for shrink-wrap. We should not
26449 emit unwind information for it because these are used either for
26450 pretend arguments or notes to adjust sp and restore registers from
26451 stack. */
26452 case REG_CFA_DEF_CFA:
26453 case REG_CFA_ADJUST_CFA:
26454 case REG_CFA_RESTORE:
26455 return;
26457 case REG_CFA_EXPRESSION:
26458 case REG_CFA_OFFSET:
26459 /* ??? Only handling here what we actually emit. */
26460 gcc_unreachable ();
26462 default:
26463 break;
26466 if (handled_one)
26467 return;
26468 pat = PATTERN (insn);
26469 found:
26471 switch (GET_CODE (pat))
26473 case SET:
26474 arm_unwind_emit_set (asm_out_file, pat);
26475 break;
26477 case SEQUENCE:
26478 /* Store multiple. */
26479 arm_unwind_emit_sequence (asm_out_file, pat);
26480 break;
26482 default:
26483 abort();
26488 /* Output a reference from a function exception table to the type_info
26489 object X. The EABI specifies that the symbol should be relocated by
26490 an R_ARM_TARGET2 relocation. */
26492 static bool
26493 arm_output_ttype (rtx x)
26495 fputs ("\t.word\t", asm_out_file);
26496 output_addr_const (asm_out_file, x);
26497 /* Use special relocations for symbol references. */
26498 if (!CONST_INT_P (x))
26499 fputs ("(TARGET2)", asm_out_file);
26500 fputc ('\n', asm_out_file);
26502 return TRUE;
26505 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26507 static void
26508 arm_asm_emit_except_personality (rtx personality)
26510 fputs ("\t.personality\t", asm_out_file);
26511 output_addr_const (asm_out_file, personality);
26512 fputc ('\n', asm_out_file);
26515 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26517 static void
26518 arm_asm_init_sections (void)
26520 exception_section = get_unnamed_section (0, output_section_asm_op,
26521 "\t.handlerdata");
26523 #endif /* ARM_UNWIND_INFO */
26525 /* Output unwind directives for the start/end of a function. */
26527 void
26528 arm_output_fn_unwind (FILE * f, bool prologue)
26530 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26531 return;
26533 if (prologue)
26534 fputs ("\t.fnstart\n", f);
26535 else
26537 /* If this function will never be unwound, then mark it as such.
26538 The came condition is used in arm_unwind_emit to suppress
26539 the frame annotations. */
26540 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26541 && (TREE_NOTHROW (current_function_decl)
26542 || crtl->all_throwers_are_sibcalls))
26543 fputs("\t.cantunwind\n", f);
26545 fputs ("\t.fnend\n", f);
26549 static bool
26550 arm_emit_tls_decoration (FILE *fp, rtx x)
26552 enum tls_reloc reloc;
26553 rtx val;
26555 val = XVECEXP (x, 0, 0);
26556 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26558 output_addr_const (fp, val);
26560 switch (reloc)
26562 case TLS_GD32:
26563 fputs ("(tlsgd)", fp);
26564 break;
26565 case TLS_LDM32:
26566 fputs ("(tlsldm)", fp);
26567 break;
26568 case TLS_LDO32:
26569 fputs ("(tlsldo)", fp);
26570 break;
26571 case TLS_IE32:
26572 fputs ("(gottpoff)", fp);
26573 break;
26574 case TLS_LE32:
26575 fputs ("(tpoff)", fp);
26576 break;
26577 case TLS_DESCSEQ:
26578 fputs ("(tlsdesc)", fp);
26579 break;
26580 default:
26581 gcc_unreachable ();
26584 switch (reloc)
26586 case TLS_GD32:
26587 case TLS_LDM32:
26588 case TLS_IE32:
26589 case TLS_DESCSEQ:
26590 fputs (" + (. - ", fp);
26591 output_addr_const (fp, XVECEXP (x, 0, 2));
26592 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26593 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26594 output_addr_const (fp, XVECEXP (x, 0, 3));
26595 fputc (')', fp);
26596 break;
26597 default:
26598 break;
26601 return TRUE;
26604 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26606 static void
26607 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26609 gcc_assert (size == 4);
26610 fputs ("\t.word\t", file);
26611 output_addr_const (file, x);
26612 fputs ("(tlsldo)", file);
26615 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26617 static bool
26618 arm_output_addr_const_extra (FILE *fp, rtx x)
26620 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26621 return arm_emit_tls_decoration (fp, x);
26622 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26624 char label[256];
26625 int labelno = INTVAL (XVECEXP (x, 0, 0));
26627 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26628 assemble_name_raw (fp, label);
26630 return TRUE;
26632 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26634 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26635 if (GOT_PCREL)
26636 fputs ("+.", fp);
26637 fputs ("-(", fp);
26638 output_addr_const (fp, XVECEXP (x, 0, 0));
26639 fputc (')', fp);
26640 return TRUE;
26642 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26644 output_addr_const (fp, XVECEXP (x, 0, 0));
26645 if (GOT_PCREL)
26646 fputs ("+.", fp);
26647 fputs ("-(", fp);
26648 output_addr_const (fp, XVECEXP (x, 0, 1));
26649 fputc (')', fp);
26650 return TRUE;
26652 else if (GET_CODE (x) == CONST_VECTOR)
26653 return arm_emit_vector_const (fp, x);
26655 return FALSE;
26658 /* Output assembly for a shift instruction.
26659 SET_FLAGS determines how the instruction modifies the condition codes.
26660 0 - Do not set condition codes.
26661 1 - Set condition codes.
26662 2 - Use smallest instruction. */
26663 const char *
26664 arm_output_shift(rtx * operands, int set_flags)
26666 char pattern[100];
26667 static const char flag_chars[3] = {'?', '.', '!'};
26668 const char *shift;
26669 HOST_WIDE_INT val;
26670 char c;
26672 c = flag_chars[set_flags];
26673 if (TARGET_UNIFIED_ASM)
26675 shift = shift_op(operands[3], &val);
26676 if (shift)
26678 if (val != -1)
26679 operands[2] = GEN_INT(val);
26680 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26682 else
26683 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26685 else
26686 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26687 output_asm_insn (pattern, operands);
26688 return "";
26691 /* Output assembly for a WMMX immediate shift instruction. */
26692 const char *
26693 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26695 int shift = INTVAL (operands[2]);
26696 char templ[50];
26697 enum machine_mode opmode = GET_MODE (operands[0]);
26699 gcc_assert (shift >= 0);
26701 /* If the shift value in the register versions is > 63 (for D qualifier),
26702 31 (for W qualifier) or 15 (for H qualifier). */
26703 if (((opmode == V4HImode) && (shift > 15))
26704 || ((opmode == V2SImode) && (shift > 31))
26705 || ((opmode == DImode) && (shift > 63)))
26707 if (wror_or_wsra)
26709 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26710 output_asm_insn (templ, operands);
26711 if (opmode == DImode)
26713 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26714 output_asm_insn (templ, operands);
26717 else
26719 /* The destination register will contain all zeros. */
26720 sprintf (templ, "wzero\t%%0");
26721 output_asm_insn (templ, operands);
26723 return "";
26726 if ((opmode == DImode) && (shift > 32))
26728 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26729 output_asm_insn (templ, operands);
26730 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26731 output_asm_insn (templ, operands);
26733 else
26735 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26736 output_asm_insn (templ, operands);
26738 return "";
26741 /* Output assembly for a WMMX tinsr instruction. */
26742 const char *
26743 arm_output_iwmmxt_tinsr (rtx *operands)
26745 int mask = INTVAL (operands[3]);
26746 int i;
26747 char templ[50];
26748 int units = mode_nunits[GET_MODE (operands[0])];
26749 gcc_assert ((mask & (mask - 1)) == 0);
26750 for (i = 0; i < units; ++i)
26752 if ((mask & 0x01) == 1)
26754 break;
26756 mask >>= 1;
26758 gcc_assert (i < units);
26760 switch (GET_MODE (operands[0]))
26762 case V8QImode:
26763 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26764 break;
26765 case V4HImode:
26766 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26767 break;
26768 case V2SImode:
26769 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26770 break;
26771 default:
26772 gcc_unreachable ();
26773 break;
26775 output_asm_insn (templ, operands);
26777 return "";
26780 /* Output a Thumb-1 casesi dispatch sequence. */
26781 const char *
26782 thumb1_output_casesi (rtx *operands)
26784 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
26786 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26788 switch (GET_MODE(diff_vec))
26790 case QImode:
26791 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26792 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26793 case HImode:
26794 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26795 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26796 case SImode:
26797 return "bl\t%___gnu_thumb1_case_si";
26798 default:
26799 gcc_unreachable ();
26803 /* Output a Thumb-2 casesi instruction. */
26804 const char *
26805 thumb2_output_casesi (rtx *operands)
26807 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
26809 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26811 output_asm_insn ("cmp\t%0, %1", operands);
26812 output_asm_insn ("bhi\t%l3", operands);
26813 switch (GET_MODE(diff_vec))
26815 case QImode:
26816 return "tbb\t[%|pc, %0]";
26817 case HImode:
26818 return "tbh\t[%|pc, %0, lsl #1]";
26819 case SImode:
26820 if (flag_pic)
26822 output_asm_insn ("adr\t%4, %l2", operands);
26823 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26824 output_asm_insn ("add\t%4, %4, %5", operands);
26825 return "bx\t%4";
26827 else
26829 output_asm_insn ("adr\t%4, %l2", operands);
26830 return "ldr\t%|pc, [%4, %0, lsl #2]";
26832 default:
26833 gcc_unreachable ();
26837 /* Most ARM cores are single issue, but some newer ones can dual issue.
26838 The scheduler descriptions rely on this being correct. */
26839 static int
26840 arm_issue_rate (void)
26842 switch (arm_tune)
26844 case cortexa15:
26845 return 3;
26847 case cortexr4:
26848 case cortexr4f:
26849 case cortexr5:
26850 case genericv7a:
26851 case cortexa5:
26852 case cortexa7:
26853 case cortexa8:
26854 case cortexa9:
26855 case cortexa53:
26856 case fa726te:
26857 case marvell_pj4:
26858 return 2;
26860 default:
26861 return 1;
26865 /* A table and a function to perform ARM-specific name mangling for
26866 NEON vector types in order to conform to the AAPCS (see "Procedure
26867 Call Standard for the ARM Architecture", Appendix A). To qualify
26868 for emission with the mangled names defined in that document, a
26869 vector type must not only be of the correct mode but also be
26870 composed of NEON vector element types (e.g. __builtin_neon_qi). */
26871 typedef struct
26873 enum machine_mode mode;
26874 const char *element_type_name;
26875 const char *aapcs_name;
26876 } arm_mangle_map_entry;
26878 static arm_mangle_map_entry arm_mangle_map[] = {
26879 /* 64-bit containerized types. */
26880 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
26881 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
26882 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
26883 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
26884 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
26885 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
26886 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
26887 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
26888 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
26889 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
26891 /* 128-bit containerized types. */
26892 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
26893 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
26894 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
26895 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
26896 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
26897 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
26898 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
26899 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
26900 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
26901 { VOIDmode, NULL, NULL }
26904 const char *
26905 arm_mangle_type (const_tree type)
26907 arm_mangle_map_entry *pos = arm_mangle_map;
26909 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26910 has to be managled as if it is in the "std" namespace. */
26911 if (TARGET_AAPCS_BASED
26912 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
26913 return "St9__va_list";
26915 /* Half-precision float. */
26916 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
26917 return "Dh";
26919 if (TREE_CODE (type) != VECTOR_TYPE)
26920 return NULL;
26922 /* Check the mode of the vector type, and the name of the vector
26923 element type, against the table. */
26924 while (pos->mode != VOIDmode)
26926 tree elt_type = TREE_TYPE (type);
26928 if (pos->mode == TYPE_MODE (type)
26929 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
26930 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
26931 pos->element_type_name))
26932 return pos->aapcs_name;
26934 pos++;
26937 /* Use the default mangling for unrecognized (possibly user-defined)
26938 vector types. */
26939 return NULL;
26942 /* Order of allocation of core registers for Thumb: this allocation is
26943 written over the corresponding initial entries of the array
26944 initialized with REG_ALLOC_ORDER. We allocate all low registers
26945 first. Saving and restoring a low register is usually cheaper than
26946 using a call-clobbered high register. */
26948 static const int thumb_core_reg_alloc_order[] =
26950 3, 2, 1, 0, 4, 5, 6, 7,
26951 14, 12, 8, 9, 10, 11
26954 /* Adjust register allocation order when compiling for Thumb. */
26956 void
26957 arm_order_regs_for_local_alloc (void)
26959 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26960 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26961 if (TARGET_THUMB)
26962 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26963 sizeof (thumb_core_reg_alloc_order));
26966 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26968 bool
26969 arm_frame_pointer_required (void)
26971 return (cfun->has_nonlocal_label
26972 || SUBTARGET_FRAME_POINTER_REQUIRED
26973 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
26976 /* Only thumb1 can't support conditional execution, so return true if
26977 the target is not thumb1. */
26978 static bool
26979 arm_have_conditional_execution (void)
26981 return !TARGET_THUMB1;
26984 tree
26985 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
26987 enum machine_mode in_mode, out_mode;
26988 int in_n, out_n;
26990 if (TREE_CODE (type_out) != VECTOR_TYPE
26991 || TREE_CODE (type_in) != VECTOR_TYPE
26992 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
26993 return NULL_TREE;
26995 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26996 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26997 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26998 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27000 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
27001 decl of the vectorized builtin for the appropriate vector mode.
27002 NULL_TREE is returned if no such builtin is available. */
27003 #undef ARM_CHECK_BUILTIN_MODE
27004 #define ARM_CHECK_BUILTIN_MODE(C) \
27005 (out_mode == SFmode && out_n == C \
27006 && in_mode == SFmode && in_n == C)
27008 #undef ARM_FIND_VRINT_VARIANT
27009 #define ARM_FIND_VRINT_VARIANT(N) \
27010 (ARM_CHECK_BUILTIN_MODE (2) \
27011 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
27012 : (ARM_CHECK_BUILTIN_MODE (4) \
27013 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
27014 : NULL_TREE))
27016 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
27018 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27019 switch (fn)
27021 case BUILT_IN_FLOORF:
27022 return ARM_FIND_VRINT_VARIANT (vrintm);
27023 case BUILT_IN_CEILF:
27024 return ARM_FIND_VRINT_VARIANT (vrintp);
27025 case BUILT_IN_TRUNCF:
27026 return ARM_FIND_VRINT_VARIANT (vrintz);
27027 case BUILT_IN_ROUNDF:
27028 return ARM_FIND_VRINT_VARIANT (vrinta);
27029 default:
27030 return NULL_TREE;
27033 return NULL_TREE;
27035 #undef ARM_CHECK_BUILTIN_MODE
27036 #undef ARM_FIND_VRINT_VARIANT
27038 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27039 static HOST_WIDE_INT
27040 arm_vector_alignment (const_tree type)
27042 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
27044 if (TARGET_AAPCS_BASED)
27045 align = MIN (align, 64);
27047 return align;
27050 static unsigned int
27051 arm_autovectorize_vector_sizes (void)
27053 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27056 static bool
27057 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27059 /* Vectors which aren't in packed structures will not be less aligned than
27060 the natural alignment of their element type, so this is safe. */
27061 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
27062 return !is_packed;
27064 return default_builtin_vector_alignment_reachable (type, is_packed);
27067 static bool
27068 arm_builtin_support_vector_misalignment (enum machine_mode mode,
27069 const_tree type, int misalignment,
27070 bool is_packed)
27072 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
27074 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27076 if (is_packed)
27077 return align == 1;
27079 /* If the misalignment is unknown, we should be able to handle the access
27080 so long as it is not to a member of a packed data structure. */
27081 if (misalignment == -1)
27082 return true;
27084 /* Return true if the misalignment is a multiple of the natural alignment
27085 of the vector's element type. This is probably always going to be
27086 true in practice, since we've already established that this isn't a
27087 packed access. */
27088 return ((misalignment % align) == 0);
27091 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27092 is_packed);
27095 static void
27096 arm_conditional_register_usage (void)
27098 int regno;
27100 if (TARGET_THUMB1 && optimize_size)
27102 /* When optimizing for size on Thumb-1, it's better not
27103 to use the HI regs, because of the overhead of
27104 stacking them. */
27105 for (regno = FIRST_HI_REGNUM;
27106 regno <= LAST_HI_REGNUM; ++regno)
27107 fixed_regs[regno] = call_used_regs[regno] = 1;
27110 /* The link register can be clobbered by any branch insn,
27111 but we have no way to track that at present, so mark
27112 it as unavailable. */
27113 if (TARGET_THUMB1)
27114 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27116 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27118 /* VFPv3 registers are disabled when earlier VFP
27119 versions are selected due to the definition of
27120 LAST_VFP_REGNUM. */
27121 for (regno = FIRST_VFP_REGNUM;
27122 regno <= LAST_VFP_REGNUM; ++ regno)
27124 fixed_regs[regno] = 0;
27125 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27126 || regno >= FIRST_VFP_REGNUM + 32;
27130 if (TARGET_REALLY_IWMMXT)
27132 regno = FIRST_IWMMXT_GR_REGNUM;
27133 /* The 2002/10/09 revision of the XScale ABI has wCG0
27134 and wCG1 as call-preserved registers. The 2002/11/21
27135 revision changed this so that all wCG registers are
27136 scratch registers. */
27137 for (regno = FIRST_IWMMXT_GR_REGNUM;
27138 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27139 fixed_regs[regno] = 0;
27140 /* The XScale ABI has wR0 - wR9 as scratch registers,
27141 the rest as call-preserved registers. */
27142 for (regno = FIRST_IWMMXT_REGNUM;
27143 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27145 fixed_regs[regno] = 0;
27146 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27150 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27152 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27153 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27155 else if (TARGET_APCS_STACK)
27157 fixed_regs[10] = 1;
27158 call_used_regs[10] = 1;
27160 /* -mcaller-super-interworking reserves r11 for calls to
27161 _interwork_r11_call_via_rN(). Making the register global
27162 is an easy way of ensuring that it remains valid for all
27163 calls. */
27164 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27165 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27167 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27168 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27169 if (TARGET_CALLER_INTERWORKING)
27170 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27172 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27175 static reg_class_t
27176 arm_preferred_rename_class (reg_class_t rclass)
27178 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27179 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27180 and code size can be reduced. */
27181 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27182 return LO_REGS;
27183 else
27184 return NO_REGS;
27187 /* Compute the atrribute "length" of insn "*push_multi".
27188 So this function MUST be kept in sync with that insn pattern. */
27190 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27192 int i, regno, hi_reg;
27193 int num_saves = XVECLEN (parallel_op, 0);
27195 /* ARM mode. */
27196 if (TARGET_ARM)
27197 return 4;
27198 /* Thumb1 mode. */
27199 if (TARGET_THUMB1)
27200 return 2;
27202 /* Thumb2 mode. */
27203 regno = REGNO (first_op);
27204 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27205 for (i = 1; i < num_saves && !hi_reg; i++)
27207 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27208 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27211 if (!hi_reg)
27212 return 2;
27213 return 4;
27216 /* Compute the number of instructions emitted by output_move_double. */
27218 arm_count_output_move_double_insns (rtx *operands)
27220 int count;
27221 rtx ops[2];
27222 /* output_move_double may modify the operands array, so call it
27223 here on a copy of the array. */
27224 ops[0] = operands[0];
27225 ops[1] = operands[1];
27226 output_move_double (ops, false, &count);
27227 return count;
27231 vfp3_const_double_for_fract_bits (rtx operand)
27233 REAL_VALUE_TYPE r0;
27235 if (!CONST_DOUBLE_P (operand))
27236 return 0;
27238 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27239 if (exact_real_inverse (DFmode, &r0))
27241 if (exact_real_truncate (DFmode, &r0))
27243 HOST_WIDE_INT value = real_to_integer (&r0);
27244 value = value & 0xffffffff;
27245 if ((value != 0) && ( (value & (value - 1)) == 0))
27246 return int_log2 (value);
27249 return 0;
27252 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27254 static void
27255 arm_pre_atomic_barrier (enum memmodel model)
27257 if (need_atomic_barrier_p (model, true))
27258 emit_insn (gen_memory_barrier ());
27261 static void
27262 arm_post_atomic_barrier (enum memmodel model)
27264 if (need_atomic_barrier_p (model, false))
27265 emit_insn (gen_memory_barrier ());
27268 /* Emit the load-exclusive and store-exclusive instructions.
27269 Use acquire and release versions if necessary. */
27271 static void
27272 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
27274 rtx (*gen) (rtx, rtx);
27276 if (acq)
27278 switch (mode)
27280 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27281 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27282 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27283 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27284 default:
27285 gcc_unreachable ();
27288 else
27290 switch (mode)
27292 case QImode: gen = gen_arm_load_exclusiveqi; break;
27293 case HImode: gen = gen_arm_load_exclusivehi; break;
27294 case SImode: gen = gen_arm_load_exclusivesi; break;
27295 case DImode: gen = gen_arm_load_exclusivedi; break;
27296 default:
27297 gcc_unreachable ();
27301 emit_insn (gen (rval, mem));
27304 static void
27305 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
27306 rtx mem, bool rel)
27308 rtx (*gen) (rtx, rtx, rtx);
27310 if (rel)
27312 switch (mode)
27314 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27315 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27316 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27317 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27318 default:
27319 gcc_unreachable ();
27322 else
27324 switch (mode)
27326 case QImode: gen = gen_arm_store_exclusiveqi; break;
27327 case HImode: gen = gen_arm_store_exclusivehi; break;
27328 case SImode: gen = gen_arm_store_exclusivesi; break;
27329 case DImode: gen = gen_arm_store_exclusivedi; break;
27330 default:
27331 gcc_unreachable ();
27335 emit_insn (gen (bval, rval, mem));
27338 /* Mark the previous jump instruction as unlikely. */
27340 static void
27341 emit_unlikely_jump (rtx insn)
27343 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
27345 insn = emit_jump_insn (insn);
27346 add_reg_note (insn, REG_BR_PROB, very_unlikely);
27349 /* Expand a compare and swap pattern. */
27351 void
27352 arm_expand_compare_and_swap (rtx operands[])
27354 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27355 enum machine_mode mode;
27356 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27358 bval = operands[0];
27359 rval = operands[1];
27360 mem = operands[2];
27361 oldval = operands[3];
27362 newval = operands[4];
27363 is_weak = operands[5];
27364 mod_s = operands[6];
27365 mod_f = operands[7];
27366 mode = GET_MODE (mem);
27368 /* Normally the succ memory model must be stronger than fail, but in the
27369 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27370 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27372 if (TARGET_HAVE_LDACQ
27373 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27374 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27375 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27377 switch (mode)
27379 case QImode:
27380 case HImode:
27381 /* For narrow modes, we're going to perform the comparison in SImode,
27382 so do the zero-extension now. */
27383 rval = gen_reg_rtx (SImode);
27384 oldval = convert_modes (SImode, mode, oldval, true);
27385 /* FALLTHRU */
27387 case SImode:
27388 /* Force the value into a register if needed. We waited until after
27389 the zero-extension above to do this properly. */
27390 if (!arm_add_operand (oldval, SImode))
27391 oldval = force_reg (SImode, oldval);
27392 break;
27394 case DImode:
27395 if (!cmpdi_operand (oldval, mode))
27396 oldval = force_reg (mode, oldval);
27397 break;
27399 default:
27400 gcc_unreachable ();
27403 switch (mode)
27405 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27406 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27407 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27408 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27409 default:
27410 gcc_unreachable ();
27413 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27415 if (mode == QImode || mode == HImode)
27416 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27418 /* In all cases, we arrange for success to be signaled by Z set.
27419 This arrangement allows for the boolean result to be used directly
27420 in a subsequent branch, post optimization. */
27421 x = gen_rtx_REG (CCmode, CC_REGNUM);
27422 x = gen_rtx_EQ (SImode, x, const0_rtx);
27423 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27426 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27427 another memory store between the load-exclusive and store-exclusive can
27428 reset the monitor from Exclusive to Open state. This means we must wait
27429 until after reload to split the pattern, lest we get a register spill in
27430 the middle of the atomic sequence. */
27432 void
27433 arm_split_compare_and_swap (rtx operands[])
27435 rtx rval, mem, oldval, newval, scratch;
27436 enum machine_mode mode;
27437 enum memmodel mod_s, mod_f;
27438 bool is_weak;
27439 rtx label1, label2, x, cond;
27441 rval = operands[0];
27442 mem = operands[1];
27443 oldval = operands[2];
27444 newval = operands[3];
27445 is_weak = (operands[4] != const0_rtx);
27446 mod_s = (enum memmodel) INTVAL (operands[5]);
27447 mod_f = (enum memmodel) INTVAL (operands[6]);
27448 scratch = operands[7];
27449 mode = GET_MODE (mem);
27451 bool use_acquire = TARGET_HAVE_LDACQ
27452 && !(mod_s == MEMMODEL_RELAXED
27453 || mod_s == MEMMODEL_CONSUME
27454 || mod_s == MEMMODEL_RELEASE);
27456 bool use_release = TARGET_HAVE_LDACQ
27457 && !(mod_s == MEMMODEL_RELAXED
27458 || mod_s == MEMMODEL_CONSUME
27459 || mod_s == MEMMODEL_ACQUIRE);
27461 /* Checks whether a barrier is needed and emits one accordingly. */
27462 if (!(use_acquire || use_release))
27463 arm_pre_atomic_barrier (mod_s);
27465 label1 = NULL_RTX;
27466 if (!is_weak)
27468 label1 = gen_label_rtx ();
27469 emit_label (label1);
27471 label2 = gen_label_rtx ();
27473 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27475 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27476 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27477 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27478 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27479 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27481 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27483 /* Weak or strong, we want EQ to be true for success, so that we
27484 match the flags that we got from the compare above. */
27485 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27486 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27487 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27489 if (!is_weak)
27491 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27492 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27493 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27494 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27497 if (mod_f != MEMMODEL_RELAXED)
27498 emit_label (label2);
27500 /* Checks whether a barrier is needed and emits one accordingly. */
27501 if (!(use_acquire || use_release))
27502 arm_post_atomic_barrier (mod_s);
27504 if (mod_f == MEMMODEL_RELAXED)
27505 emit_label (label2);
27508 void
27509 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27510 rtx value, rtx model_rtx, rtx cond)
27512 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27513 enum machine_mode mode = GET_MODE (mem);
27514 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
27515 rtx label, x;
27517 bool use_acquire = TARGET_HAVE_LDACQ
27518 && !(model == MEMMODEL_RELAXED
27519 || model == MEMMODEL_CONSUME
27520 || model == MEMMODEL_RELEASE);
27522 bool use_release = TARGET_HAVE_LDACQ
27523 && !(model == MEMMODEL_RELAXED
27524 || model == MEMMODEL_CONSUME
27525 || model == MEMMODEL_ACQUIRE);
27527 /* Checks whether a barrier is needed and emits one accordingly. */
27528 if (!(use_acquire || use_release))
27529 arm_pre_atomic_barrier (model);
27531 label = gen_label_rtx ();
27532 emit_label (label);
27534 if (new_out)
27535 new_out = gen_lowpart (wmode, new_out);
27536 if (old_out)
27537 old_out = gen_lowpart (wmode, old_out);
27538 else
27539 old_out = new_out;
27540 value = simplify_gen_subreg (wmode, value, mode, 0);
27542 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27544 switch (code)
27546 case SET:
27547 new_out = value;
27548 break;
27550 case NOT:
27551 x = gen_rtx_AND (wmode, old_out, value);
27552 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27553 x = gen_rtx_NOT (wmode, new_out);
27554 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27555 break;
27557 case MINUS:
27558 if (CONST_INT_P (value))
27560 value = GEN_INT (-INTVAL (value));
27561 code = PLUS;
27563 /* FALLTHRU */
27565 case PLUS:
27566 if (mode == DImode)
27568 /* DImode plus/minus need to clobber flags. */
27569 /* The adddi3 and subdi3 patterns are incorrectly written so that
27570 they require matching operands, even when we could easily support
27571 three operands. Thankfully, this can be fixed up post-splitting,
27572 as the individual add+adc patterns do accept three operands and
27573 post-reload cprop can make these moves go away. */
27574 emit_move_insn (new_out, old_out);
27575 if (code == PLUS)
27576 x = gen_adddi3 (new_out, new_out, value);
27577 else
27578 x = gen_subdi3 (new_out, new_out, value);
27579 emit_insn (x);
27580 break;
27582 /* FALLTHRU */
27584 default:
27585 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27586 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27587 break;
27590 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27591 use_release);
27593 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27594 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27596 /* Checks whether a barrier is needed and emits one accordingly. */
27597 if (!(use_acquire || use_release))
27598 arm_post_atomic_barrier (model);
27601 #define MAX_VECT_LEN 16
27603 struct expand_vec_perm_d
27605 rtx target, op0, op1;
27606 unsigned char perm[MAX_VECT_LEN];
27607 enum machine_mode vmode;
27608 unsigned char nelt;
27609 bool one_vector_p;
27610 bool testing_p;
27613 /* Generate a variable permutation. */
27615 static void
27616 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27618 enum machine_mode vmode = GET_MODE (target);
27619 bool one_vector_p = rtx_equal_p (op0, op1);
27621 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27622 gcc_checking_assert (GET_MODE (op0) == vmode);
27623 gcc_checking_assert (GET_MODE (op1) == vmode);
27624 gcc_checking_assert (GET_MODE (sel) == vmode);
27625 gcc_checking_assert (TARGET_NEON);
27627 if (one_vector_p)
27629 if (vmode == V8QImode)
27630 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27631 else
27632 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27634 else
27636 rtx pair;
27638 if (vmode == V8QImode)
27640 pair = gen_reg_rtx (V16QImode);
27641 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27642 pair = gen_lowpart (TImode, pair);
27643 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27645 else
27647 pair = gen_reg_rtx (OImode);
27648 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27649 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27654 void
27655 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27657 enum machine_mode vmode = GET_MODE (target);
27658 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27659 bool one_vector_p = rtx_equal_p (op0, op1);
27660 rtx rmask[MAX_VECT_LEN], mask;
27662 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27663 numbering of elements for big-endian, we must reverse the order. */
27664 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27666 /* The VTBL instruction does not use a modulo index, so we must take care
27667 of that ourselves. */
27668 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27669 for (i = 0; i < nelt; ++i)
27670 rmask[i] = mask;
27671 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27672 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27674 arm_expand_vec_perm_1 (target, op0, op1, sel);
27677 /* Generate or test for an insn that supports a constant permutation. */
27679 /* Recognize patterns for the VUZP insns. */
27681 static bool
27682 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27684 unsigned int i, odd, mask, nelt = d->nelt;
27685 rtx out0, out1, in0, in1, x;
27686 rtx (*gen)(rtx, rtx, rtx, rtx);
27688 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27689 return false;
27691 /* Note that these are little-endian tests. Adjust for big-endian later. */
27692 if (d->perm[0] == 0)
27693 odd = 0;
27694 else if (d->perm[0] == 1)
27695 odd = 1;
27696 else
27697 return false;
27698 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27700 for (i = 0; i < nelt; i++)
27702 unsigned elt = (i * 2 + odd) & mask;
27703 if (d->perm[i] != elt)
27704 return false;
27707 /* Success! */
27708 if (d->testing_p)
27709 return true;
27711 switch (d->vmode)
27713 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27714 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27715 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27716 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27717 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27718 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27719 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27720 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27721 default:
27722 gcc_unreachable ();
27725 in0 = d->op0;
27726 in1 = d->op1;
27727 if (BYTES_BIG_ENDIAN)
27729 x = in0, in0 = in1, in1 = x;
27730 odd = !odd;
27733 out0 = d->target;
27734 out1 = gen_reg_rtx (d->vmode);
27735 if (odd)
27736 x = out0, out0 = out1, out1 = x;
27738 emit_insn (gen (out0, in0, in1, out1));
27739 return true;
27742 /* Recognize patterns for the VZIP insns. */
27744 static bool
27745 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27747 unsigned int i, high, mask, nelt = d->nelt;
27748 rtx out0, out1, in0, in1, x;
27749 rtx (*gen)(rtx, rtx, rtx, rtx);
27751 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27752 return false;
27754 /* Note that these are little-endian tests. Adjust for big-endian later. */
27755 high = nelt / 2;
27756 if (d->perm[0] == high)
27758 else if (d->perm[0] == 0)
27759 high = 0;
27760 else
27761 return false;
27762 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27764 for (i = 0; i < nelt / 2; i++)
27766 unsigned elt = (i + high) & mask;
27767 if (d->perm[i * 2] != elt)
27768 return false;
27769 elt = (elt + nelt) & mask;
27770 if (d->perm[i * 2 + 1] != elt)
27771 return false;
27774 /* Success! */
27775 if (d->testing_p)
27776 return true;
27778 switch (d->vmode)
27780 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27781 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27782 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27783 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27784 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27785 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27786 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27787 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27788 default:
27789 gcc_unreachable ();
27792 in0 = d->op0;
27793 in1 = d->op1;
27794 if (BYTES_BIG_ENDIAN)
27796 x = in0, in0 = in1, in1 = x;
27797 high = !high;
27800 out0 = d->target;
27801 out1 = gen_reg_rtx (d->vmode);
27802 if (high)
27803 x = out0, out0 = out1, out1 = x;
27805 emit_insn (gen (out0, in0, in1, out1));
27806 return true;
27809 /* Recognize patterns for the VREV insns. */
27811 static bool
27812 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27814 unsigned int i, j, diff, nelt = d->nelt;
27815 rtx (*gen)(rtx, rtx, rtx);
27817 if (!d->one_vector_p)
27818 return false;
27820 diff = d->perm[0];
27821 switch (diff)
27823 case 7:
27824 switch (d->vmode)
27826 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27827 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27828 default:
27829 return false;
27831 break;
27832 case 3:
27833 switch (d->vmode)
27835 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27836 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27837 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27838 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27839 default:
27840 return false;
27842 break;
27843 case 1:
27844 switch (d->vmode)
27846 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27847 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27848 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27849 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27850 case V4SImode: gen = gen_neon_vrev64v4si; break;
27851 case V2SImode: gen = gen_neon_vrev64v2si; break;
27852 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27853 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27854 default:
27855 return false;
27857 break;
27858 default:
27859 return false;
27862 for (i = 0; i < nelt ; i += diff + 1)
27863 for (j = 0; j <= diff; j += 1)
27865 /* This is guaranteed to be true as the value of diff
27866 is 7, 3, 1 and we should have enough elements in the
27867 queue to generate this. Getting a vector mask with a
27868 value of diff other than these values implies that
27869 something is wrong by the time we get here. */
27870 gcc_assert (i + j < nelt);
27871 if (d->perm[i + j] != i + diff - j)
27872 return false;
27875 /* Success! */
27876 if (d->testing_p)
27877 return true;
27879 /* ??? The third operand is an artifact of the builtin infrastructure
27880 and is ignored by the actual instruction. */
27881 emit_insn (gen (d->target, d->op0, const0_rtx));
27882 return true;
27885 /* Recognize patterns for the VTRN insns. */
27887 static bool
27888 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27890 unsigned int i, odd, mask, nelt = d->nelt;
27891 rtx out0, out1, in0, in1, x;
27892 rtx (*gen)(rtx, rtx, rtx, rtx);
27894 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27895 return false;
27897 /* Note that these are little-endian tests. Adjust for big-endian later. */
27898 if (d->perm[0] == 0)
27899 odd = 0;
27900 else if (d->perm[0] == 1)
27901 odd = 1;
27902 else
27903 return false;
27904 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27906 for (i = 0; i < nelt; i += 2)
27908 if (d->perm[i] != i + odd)
27909 return false;
27910 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27911 return false;
27914 /* Success! */
27915 if (d->testing_p)
27916 return true;
27918 switch (d->vmode)
27920 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27921 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27922 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27923 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27924 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27925 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27926 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27927 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27928 default:
27929 gcc_unreachable ();
27932 in0 = d->op0;
27933 in1 = d->op1;
27934 if (BYTES_BIG_ENDIAN)
27936 x = in0, in0 = in1, in1 = x;
27937 odd = !odd;
27940 out0 = d->target;
27941 out1 = gen_reg_rtx (d->vmode);
27942 if (odd)
27943 x = out0, out0 = out1, out1 = x;
27945 emit_insn (gen (out0, in0, in1, out1));
27946 return true;
27949 /* Recognize patterns for the VEXT insns. */
27951 static bool
27952 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27954 unsigned int i, nelt = d->nelt;
27955 rtx (*gen) (rtx, rtx, rtx, rtx);
27956 rtx offset;
27958 unsigned int location;
27960 unsigned int next = d->perm[0] + 1;
27962 /* TODO: Handle GCC's numbering of elements for big-endian. */
27963 if (BYTES_BIG_ENDIAN)
27964 return false;
27966 /* Check if the extracted indexes are increasing by one. */
27967 for (i = 1; i < nelt; next++, i++)
27969 /* If we hit the most significant element of the 2nd vector in
27970 the previous iteration, no need to test further. */
27971 if (next == 2 * nelt)
27972 return false;
27974 /* If we are operating on only one vector: it could be a
27975 rotation. If there are only two elements of size < 64, let
27976 arm_evpc_neon_vrev catch it. */
27977 if (d->one_vector_p && (next == nelt))
27979 if ((nelt == 2) && (d->vmode != V2DImode))
27980 return false;
27981 else
27982 next = 0;
27985 if (d->perm[i] != next)
27986 return false;
27989 location = d->perm[0];
27991 switch (d->vmode)
27993 case V16QImode: gen = gen_neon_vextv16qi; break;
27994 case V8QImode: gen = gen_neon_vextv8qi; break;
27995 case V4HImode: gen = gen_neon_vextv4hi; break;
27996 case V8HImode: gen = gen_neon_vextv8hi; break;
27997 case V2SImode: gen = gen_neon_vextv2si; break;
27998 case V4SImode: gen = gen_neon_vextv4si; break;
27999 case V2SFmode: gen = gen_neon_vextv2sf; break;
28000 case V4SFmode: gen = gen_neon_vextv4sf; break;
28001 case V2DImode: gen = gen_neon_vextv2di; break;
28002 default:
28003 return false;
28006 /* Success! */
28007 if (d->testing_p)
28008 return true;
28010 offset = GEN_INT (location);
28011 emit_insn (gen (d->target, d->op0, d->op1, offset));
28012 return true;
28015 /* The NEON VTBL instruction is a fully variable permuation that's even
28016 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28017 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28018 can do slightly better by expanding this as a constant where we don't
28019 have to apply a mask. */
28021 static bool
28022 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28024 rtx rperm[MAX_VECT_LEN], sel;
28025 enum machine_mode vmode = d->vmode;
28026 unsigned int i, nelt = d->nelt;
28028 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28029 numbering of elements for big-endian, we must reverse the order. */
28030 if (BYTES_BIG_ENDIAN)
28031 return false;
28033 if (d->testing_p)
28034 return true;
28036 /* Generic code will try constant permutation twice. Once with the
28037 original mode and again with the elements lowered to QImode.
28038 So wait and don't do the selector expansion ourselves. */
28039 if (vmode != V8QImode && vmode != V16QImode)
28040 return false;
28042 for (i = 0; i < nelt; ++i)
28043 rperm[i] = GEN_INT (d->perm[i]);
28044 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28045 sel = force_reg (vmode, sel);
28047 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28048 return true;
28051 static bool
28052 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28054 /* Check if the input mask matches vext before reordering the
28055 operands. */
28056 if (TARGET_NEON)
28057 if (arm_evpc_neon_vext (d))
28058 return true;
28060 /* The pattern matching functions above are written to look for a small
28061 number to begin the sequence (0, 1, N/2). If we begin with an index
28062 from the second operand, we can swap the operands. */
28063 if (d->perm[0] >= d->nelt)
28065 unsigned i, nelt = d->nelt;
28066 rtx x;
28068 for (i = 0; i < nelt; ++i)
28069 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28071 x = d->op0;
28072 d->op0 = d->op1;
28073 d->op1 = x;
28076 if (TARGET_NEON)
28078 if (arm_evpc_neon_vuzp (d))
28079 return true;
28080 if (arm_evpc_neon_vzip (d))
28081 return true;
28082 if (arm_evpc_neon_vrev (d))
28083 return true;
28084 if (arm_evpc_neon_vtrn (d))
28085 return true;
28086 return arm_evpc_neon_vtbl (d);
28088 return false;
28091 /* Expand a vec_perm_const pattern. */
28093 bool
28094 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28096 struct expand_vec_perm_d d;
28097 int i, nelt, which;
28099 d.target = target;
28100 d.op0 = op0;
28101 d.op1 = op1;
28103 d.vmode = GET_MODE (target);
28104 gcc_assert (VECTOR_MODE_P (d.vmode));
28105 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28106 d.testing_p = false;
28108 for (i = which = 0; i < nelt; ++i)
28110 rtx e = XVECEXP (sel, 0, i);
28111 int ei = INTVAL (e) & (2 * nelt - 1);
28112 which |= (ei < nelt ? 1 : 2);
28113 d.perm[i] = ei;
28116 switch (which)
28118 default:
28119 gcc_unreachable();
28121 case 3:
28122 d.one_vector_p = false;
28123 if (!rtx_equal_p (op0, op1))
28124 break;
28126 /* The elements of PERM do not suggest that only the first operand
28127 is used, but both operands are identical. Allow easier matching
28128 of the permutation by folding the permutation into the single
28129 input vector. */
28130 /* FALLTHRU */
28131 case 2:
28132 for (i = 0; i < nelt; ++i)
28133 d.perm[i] &= nelt - 1;
28134 d.op0 = op1;
28135 d.one_vector_p = true;
28136 break;
28138 case 1:
28139 d.op1 = op0;
28140 d.one_vector_p = true;
28141 break;
28144 return arm_expand_vec_perm_const_1 (&d);
28147 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28149 static bool
28150 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
28151 const unsigned char *sel)
28153 struct expand_vec_perm_d d;
28154 unsigned int i, nelt, which;
28155 bool ret;
28157 d.vmode = vmode;
28158 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28159 d.testing_p = true;
28160 memcpy (d.perm, sel, nelt);
28162 /* Categorize the set of elements in the selector. */
28163 for (i = which = 0; i < nelt; ++i)
28165 unsigned char e = d.perm[i];
28166 gcc_assert (e < 2 * nelt);
28167 which |= (e < nelt ? 1 : 2);
28170 /* For all elements from second vector, fold the elements to first. */
28171 if (which == 2)
28172 for (i = 0; i < nelt; ++i)
28173 d.perm[i] -= nelt;
28175 /* Check whether the mask can be applied to the vector type. */
28176 d.one_vector_p = (which != 3);
28178 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28179 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28180 if (!d.one_vector_p)
28181 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28183 start_sequence ();
28184 ret = arm_expand_vec_perm_const_1 (&d);
28185 end_sequence ();
28187 return ret;
28190 bool
28191 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
28193 /* If we are soft float and we do not have ldrd
28194 then all auto increment forms are ok. */
28195 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28196 return true;
28198 switch (code)
28200 /* Post increment and Pre Decrement are supported for all
28201 instruction forms except for vector forms. */
28202 case ARM_POST_INC:
28203 case ARM_PRE_DEC:
28204 if (VECTOR_MODE_P (mode))
28206 if (code != ARM_PRE_DEC)
28207 return true;
28208 else
28209 return false;
28212 return true;
28214 case ARM_POST_DEC:
28215 case ARM_PRE_INC:
28216 /* Without LDRD and mode size greater than
28217 word size, there is no point in auto-incrementing
28218 because ldm and stm will not have these forms. */
28219 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28220 return false;
28222 /* Vector and floating point modes do not support
28223 these auto increment forms. */
28224 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28225 return false;
28227 return true;
28229 default:
28230 return false;
28234 return false;
28237 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28238 on ARM, since we know that shifts by negative amounts are no-ops.
28239 Additionally, the default expansion code is not available or suitable
28240 for post-reload insn splits (this can occur when the register allocator
28241 chooses not to do a shift in NEON).
28243 This function is used in both initial expand and post-reload splits, and
28244 handles all kinds of 64-bit shifts.
28246 Input requirements:
28247 - It is safe for the input and output to be the same register, but
28248 early-clobber rules apply for the shift amount and scratch registers.
28249 - Shift by register requires both scratch registers. In all other cases
28250 the scratch registers may be NULL.
28251 - Ashiftrt by a register also clobbers the CC register. */
28252 void
28253 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28254 rtx amount, rtx scratch1, rtx scratch2)
28256 rtx out_high = gen_highpart (SImode, out);
28257 rtx out_low = gen_lowpart (SImode, out);
28258 rtx in_high = gen_highpart (SImode, in);
28259 rtx in_low = gen_lowpart (SImode, in);
28261 /* Terminology:
28262 in = the register pair containing the input value.
28263 out = the destination register pair.
28264 up = the high- or low-part of each pair.
28265 down = the opposite part to "up".
28266 In a shift, we can consider bits to shift from "up"-stream to
28267 "down"-stream, so in a left-shift "up" is the low-part and "down"
28268 is the high-part of each register pair. */
28270 rtx out_up = code == ASHIFT ? out_low : out_high;
28271 rtx out_down = code == ASHIFT ? out_high : out_low;
28272 rtx in_up = code == ASHIFT ? in_low : in_high;
28273 rtx in_down = code == ASHIFT ? in_high : in_low;
28275 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28276 gcc_assert (out
28277 && (REG_P (out) || GET_CODE (out) == SUBREG)
28278 && GET_MODE (out) == DImode);
28279 gcc_assert (in
28280 && (REG_P (in) || GET_CODE (in) == SUBREG)
28281 && GET_MODE (in) == DImode);
28282 gcc_assert (amount
28283 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28284 && GET_MODE (amount) == SImode)
28285 || CONST_INT_P (amount)));
28286 gcc_assert (scratch1 == NULL
28287 || (GET_CODE (scratch1) == SCRATCH)
28288 || (GET_MODE (scratch1) == SImode
28289 && REG_P (scratch1)));
28290 gcc_assert (scratch2 == NULL
28291 || (GET_CODE (scratch2) == SCRATCH)
28292 || (GET_MODE (scratch2) == SImode
28293 && REG_P (scratch2)));
28294 gcc_assert (!REG_P (out) || !REG_P (amount)
28295 || !HARD_REGISTER_P (out)
28296 || (REGNO (out) != REGNO (amount)
28297 && REGNO (out) + 1 != REGNO (amount)));
28299 /* Macros to make following code more readable. */
28300 #define SUB_32(DEST,SRC) \
28301 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28302 #define RSB_32(DEST,SRC) \
28303 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28304 #define SUB_S_32(DEST,SRC) \
28305 gen_addsi3_compare0 ((DEST), (SRC), \
28306 GEN_INT (-32))
28307 #define SET(DEST,SRC) \
28308 gen_rtx_SET (SImode, (DEST), (SRC))
28309 #define SHIFT(CODE,SRC,AMOUNT) \
28310 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28311 #define LSHIFT(CODE,SRC,AMOUNT) \
28312 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28313 SImode, (SRC), (AMOUNT))
28314 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28315 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28316 SImode, (SRC), (AMOUNT))
28317 #define ORR(A,B) \
28318 gen_rtx_IOR (SImode, (A), (B))
28319 #define BRANCH(COND,LABEL) \
28320 gen_arm_cond_branch ((LABEL), \
28321 gen_rtx_ ## COND (CCmode, cc_reg, \
28322 const0_rtx), \
28323 cc_reg)
28325 /* Shifts by register and shifts by constant are handled separately. */
28326 if (CONST_INT_P (amount))
28328 /* We have a shift-by-constant. */
28330 /* First, handle out-of-range shift amounts.
28331 In both cases we try to match the result an ARM instruction in a
28332 shift-by-register would give. This helps reduce execution
28333 differences between optimization levels, but it won't stop other
28334 parts of the compiler doing different things. This is "undefined
28335 behaviour, in any case. */
28336 if (INTVAL (amount) <= 0)
28337 emit_insn (gen_movdi (out, in));
28338 else if (INTVAL (amount) >= 64)
28340 if (code == ASHIFTRT)
28342 rtx const31_rtx = GEN_INT (31);
28343 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28344 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28346 else
28347 emit_insn (gen_movdi (out, const0_rtx));
28350 /* Now handle valid shifts. */
28351 else if (INTVAL (amount) < 32)
28353 /* Shifts by a constant less than 32. */
28354 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28356 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28357 emit_insn (SET (out_down,
28358 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28359 out_down)));
28360 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28362 else
28364 /* Shifts by a constant greater than 31. */
28365 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28367 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28368 if (code == ASHIFTRT)
28369 emit_insn (gen_ashrsi3 (out_up, in_up,
28370 GEN_INT (31)));
28371 else
28372 emit_insn (SET (out_up, const0_rtx));
28375 else
28377 /* We have a shift-by-register. */
28378 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28380 /* This alternative requires the scratch registers. */
28381 gcc_assert (scratch1 && REG_P (scratch1));
28382 gcc_assert (scratch2 && REG_P (scratch2));
28384 /* We will need the values "amount-32" and "32-amount" later.
28385 Swapping them around now allows the later code to be more general. */
28386 switch (code)
28388 case ASHIFT:
28389 emit_insn (SUB_32 (scratch1, amount));
28390 emit_insn (RSB_32 (scratch2, amount));
28391 break;
28392 case ASHIFTRT:
28393 emit_insn (RSB_32 (scratch1, amount));
28394 /* Also set CC = amount > 32. */
28395 emit_insn (SUB_S_32 (scratch2, amount));
28396 break;
28397 case LSHIFTRT:
28398 emit_insn (RSB_32 (scratch1, amount));
28399 emit_insn (SUB_32 (scratch2, amount));
28400 break;
28401 default:
28402 gcc_unreachable ();
28405 /* Emit code like this:
28407 arithmetic-left:
28408 out_down = in_down << amount;
28409 out_down = (in_up << (amount - 32)) | out_down;
28410 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28411 out_up = in_up << amount;
28413 arithmetic-right:
28414 out_down = in_down >> amount;
28415 out_down = (in_up << (32 - amount)) | out_down;
28416 if (amount < 32)
28417 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28418 out_up = in_up << amount;
28420 logical-right:
28421 out_down = in_down >> amount;
28422 out_down = (in_up << (32 - amount)) | out_down;
28423 if (amount < 32)
28424 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28425 out_up = in_up << amount;
28427 The ARM and Thumb2 variants are the same but implemented slightly
28428 differently. If this were only called during expand we could just
28429 use the Thumb2 case and let combine do the right thing, but this
28430 can also be called from post-reload splitters. */
28432 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28434 if (!TARGET_THUMB2)
28436 /* Emit code for ARM mode. */
28437 emit_insn (SET (out_down,
28438 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28439 if (code == ASHIFTRT)
28441 rtx done_label = gen_label_rtx ();
28442 emit_jump_insn (BRANCH (LT, done_label));
28443 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28444 out_down)));
28445 emit_label (done_label);
28447 else
28448 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28449 out_down)));
28451 else
28453 /* Emit code for Thumb2 mode.
28454 Thumb2 can't do shift and or in one insn. */
28455 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28456 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28458 if (code == ASHIFTRT)
28460 rtx done_label = gen_label_rtx ();
28461 emit_jump_insn (BRANCH (LT, done_label));
28462 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28463 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28464 emit_label (done_label);
28466 else
28468 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28469 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28473 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28476 #undef SUB_32
28477 #undef RSB_32
28478 #undef SUB_S_32
28479 #undef SET
28480 #undef SHIFT
28481 #undef LSHIFT
28482 #undef REV_LSHIFT
28483 #undef ORR
28484 #undef BRANCH
28488 /* Returns true if a valid comparison operation and makes
28489 the operands in a form that is valid. */
28490 bool
28491 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28493 enum rtx_code code = GET_CODE (*comparison);
28494 int code_int;
28495 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28496 ? GET_MODE (*op2) : GET_MODE (*op1);
28498 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28500 if (code == UNEQ || code == LTGT)
28501 return false;
28503 code_int = (int)code;
28504 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28505 PUT_CODE (*comparison, (enum rtx_code)code_int);
28507 switch (mode)
28509 case SImode:
28510 if (!arm_add_operand (*op1, mode))
28511 *op1 = force_reg (mode, *op1);
28512 if (!arm_add_operand (*op2, mode))
28513 *op2 = force_reg (mode, *op2);
28514 return true;
28516 case DImode:
28517 if (!cmpdi_operand (*op1, mode))
28518 *op1 = force_reg (mode, *op1);
28519 if (!cmpdi_operand (*op2, mode))
28520 *op2 = force_reg (mode, *op2);
28521 return true;
28523 case SFmode:
28524 case DFmode:
28525 if (!arm_float_compare_operand (*op1, mode))
28526 *op1 = force_reg (mode, *op1);
28527 if (!arm_float_compare_operand (*op2, mode))
28528 *op2 = force_reg (mode, *op2);
28529 return true;
28530 default:
28531 break;
28534 return false;
28538 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28540 static unsigned HOST_WIDE_INT
28541 arm_asan_shadow_offset (void)
28543 return (unsigned HOST_WIDE_INT) 1 << 29;
28546 /* return TRUE if x is a reference to a value in a constant pool */
28547 extern bool
28548 arm_is_constant_pool_ref (rtx x)
28550 return (MEM_P (x)
28551 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
28552 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
28555 #include "gt-arm.h"