* arm.h (TARGET_CPU_CPP_BUILTINS): Remove Maverick support.
[official-gcc.git] / gcc / config / arm / arm.c
blob0639beb5a0483e1aad3c1fc66374e6f54672d3c7
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
63 void (*arm_lang_output_object_attributes_hook)(void);
65 struct four_ints
67 int i[4];
70 /* Forward function declarations. */
71 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
72 static int arm_compute_static_chain_stack_bytes (void);
73 static arm_stack_offsets *arm_get_frame_offsets (void);
74 static void arm_add_gc_roots (void);
75 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
76 HOST_WIDE_INT, rtx, rtx, int, int);
77 static unsigned bit_count (unsigned long);
78 static int arm_address_register_rtx_p (rtx, int);
79 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
80 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
81 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
82 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
83 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
84 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
85 inline static int thumb1_index_register_rtx_p (rtx, int);
86 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
87 static int thumb_far_jump_used_p (void);
88 static bool thumb_force_lr_save (void);
89 static unsigned arm_size_return_regs (void);
90 static bool arm_assemble_integer (rtx, unsigned int, int);
91 static void arm_print_operand (FILE *, rtx, int);
92 static void arm_print_operand_address (FILE *, rtx);
93 static bool arm_print_operand_punct_valid_p (unsigned char code);
94 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
95 static arm_cc get_arm_condition_code (rtx);
96 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
97 static rtx is_jump_table (rtx);
98 static const char *output_multi_immediate (rtx *, const char *, const char *,
99 int, HOST_WIDE_INT);
100 static const char *shift_op (rtx, HOST_WIDE_INT *);
101 static struct machine_function *arm_init_machine_status (void);
102 static void thumb_exit (FILE *, int);
103 static rtx is_jump_table (rtx);
104 static HOST_WIDE_INT get_jump_table_size (rtx);
105 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_forward_ref (Mfix *);
107 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_backward_ref (Mfix *);
109 static void assign_minipool_offsets (Mfix *);
110 static void arm_print_value (FILE *, rtx);
111 static void dump_minipool (rtx);
112 static int arm_barrier_cost (rtx);
113 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
114 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
115 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
116 rtx);
117 static void arm_reorg (void);
118 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
119 static unsigned long arm_compute_save_reg0_reg12_mask (void);
120 static unsigned long arm_compute_save_reg_mask (void);
121 static unsigned long arm_isr_value (tree);
122 static unsigned long arm_compute_func_type (void);
123 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
126 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
127 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
128 #endif
129 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
130 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
131 static int arm_comp_type_attributes (const_tree, const_tree);
132 static void arm_set_default_type_attributes (tree);
133 static int arm_adjust_cost (rtx, rtx, rtx, int);
134 static int optimal_immediate_sequence (enum rtx_code code,
135 unsigned HOST_WIDE_INT val,
136 struct four_ints *return_sequence);
137 static int optimal_immediate_sequence_1 (enum rtx_code code,
138 unsigned HOST_WIDE_INT val,
139 struct four_ints *return_sequence,
140 int i);
141 static int arm_get_strip_length (int);
142 static bool arm_function_ok_for_sibcall (tree, tree);
143 static enum machine_mode arm_promote_function_mode (const_tree,
144 enum machine_mode, int *,
145 const_tree, int);
146 static bool arm_return_in_memory (const_tree, const_tree);
147 static rtx arm_function_value (const_tree, const_tree, bool);
148 static rtx arm_libcall_value_1 (enum machine_mode);
149 static rtx arm_libcall_value (enum machine_mode, const_rtx);
150 static bool arm_function_value_regno_p (const unsigned int);
151 static void arm_internal_label (FILE *, const char *, unsigned long);
152 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
153 tree);
154 static bool arm_have_conditional_execution (void);
155 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
156 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
157 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
158 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
159 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
164 static int arm_address_cost (rtx, bool);
165 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
166 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
167 static void arm_init_builtins (void);
168 static void arm_init_iwmmxt_builtins (void);
169 static rtx safe_vector_operand (rtx, enum machine_mode);
170 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
171 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
172 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
173 static tree arm_builtin_decl (unsigned, bool);
174 static void emit_constant_insn (rtx cond, rtx pattern);
175 static rtx emit_set_insn (rtx, rtx);
176 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
177 tree, bool);
178 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
179 const_tree, bool);
180 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
181 const_tree, bool);
182 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
183 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
184 const_tree);
185 static rtx aapcs_libcall_value (enum machine_mode);
186 static int aapcs_select_return_coproc (const_tree, const_tree);
188 #ifdef OBJECT_FORMAT_ELF
189 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
190 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
191 #endif
192 #ifndef ARM_PE
193 static void arm_encode_section_info (tree, rtx, int);
194 #endif
196 static void arm_file_end (void);
197 static void arm_file_start (void);
199 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
200 tree, int *, int);
201 static bool arm_pass_by_reference (cumulative_args_t,
202 enum machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
209 #if ARM_UNWIND_INFO
210 static void arm_unwind_emit (FILE *, rtx);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 static void arm_asm_init_sections (void);
214 #endif
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
232 static bool arm_cannot_copy_insn_p (rtx);
233 static bool arm_tls_symbol_p (rtx x);
234 static int arm_issue_rate (void);
235 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
236 static bool arm_output_addr_const_extra (FILE *, rtx);
237 static bool arm_allocate_stack_slots_for_args (void);
238 static const char *arm_invalid_parameter_type (const_tree t);
239 static const char *arm_invalid_return_type (const_tree t);
240 static tree arm_promoted_type (const_tree t);
241 static tree arm_convert_to_type (tree type, tree expr);
242 static bool arm_scalar_mode_supported_p (enum machine_mode);
243 static bool arm_frame_pointer_required (void);
244 static bool arm_can_eliminate (const int, const int);
245 static void arm_asm_trampoline_template (FILE *);
246 static void arm_trampoline_init (rtx, tree, rtx);
247 static rtx arm_trampoline_adjust_address (rtx);
248 static rtx arm_pic_static_addr (rtx orig, rtx reg);
249 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
250 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool arm_array_mode_supported_p (enum machine_mode,
253 unsigned HOST_WIDE_INT);
254 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
255 static bool arm_class_likely_spilled_p (reg_class_t);
256 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
257 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
258 const_tree type,
259 int misalignment,
260 bool is_packed);
261 static void arm_conditional_register_usage (void);
262 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
263 static unsigned int arm_autovectorize_vector_sizes (void);
264 static int arm_default_branch_cost (bool, bool);
265 static int arm_cortex_a5_branch_cost (bool, bool);
267 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
268 const unsigned char *sel);
271 /* Table of machine attributes. */
272 static const struct attribute_spec arm_attribute_table[] =
274 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
275 affects_type_identity } */
276 /* Function calls made to this symbol must be done indirectly, because
277 it may lie outside of the 26 bit addressing range of a normal function
278 call. */
279 { "long_call", 0, 0, false, true, true, NULL, false },
280 /* Whereas these functions are always known to reside within the 26 bit
281 addressing range. */
282 { "short_call", 0, 0, false, true, true, NULL, false },
283 /* Specify the procedure call conventions for a function. */
284 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
285 false },
286 /* Interrupt Service Routines have special prologue and epilogue requirements. */
287 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
288 false },
289 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
290 false },
291 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
292 false },
293 #ifdef ARM_PE
294 /* ARM/PE has three new attributes:
295 interfacearm - ?
296 dllexport - for exporting a function/variable that will live in a dll
297 dllimport - for importing a function/variable from a dll
299 Microsoft allows multiple declspecs in one __declspec, separating
300 them with spaces. We do NOT support this. Instead, use __declspec
301 multiple times.
303 { "dllimport", 0, 0, true, false, false, NULL, false },
304 { "dllexport", 0, 0, true, false, false, NULL, false },
305 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
306 false },
307 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
308 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
309 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
310 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
311 false },
312 #endif
313 { NULL, 0, 0, false, false, false, NULL, false }
316 /* Initialize the GCC target structure. */
317 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
318 #undef TARGET_MERGE_DECL_ATTRIBUTES
319 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
320 #endif
322 #undef TARGET_LEGITIMIZE_ADDRESS
323 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
325 #undef TARGET_ATTRIBUTE_TABLE
326 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
328 #undef TARGET_ASM_FILE_START
329 #define TARGET_ASM_FILE_START arm_file_start
330 #undef TARGET_ASM_FILE_END
331 #define TARGET_ASM_FILE_END arm_file_end
333 #undef TARGET_ASM_ALIGNED_SI_OP
334 #define TARGET_ASM_ALIGNED_SI_OP NULL
335 #undef TARGET_ASM_INTEGER
336 #define TARGET_ASM_INTEGER arm_assemble_integer
338 #undef TARGET_PRINT_OPERAND
339 #define TARGET_PRINT_OPERAND arm_print_operand
340 #undef TARGET_PRINT_OPERAND_ADDRESS
341 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
342 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
343 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
345 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
346 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
348 #undef TARGET_ASM_FUNCTION_PROLOGUE
349 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_EPILOGUE
352 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
354 #undef TARGET_OPTION_OVERRIDE
355 #define TARGET_OPTION_OVERRIDE arm_option_override
357 #undef TARGET_COMP_TYPE_ATTRIBUTES
358 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
360 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
361 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
363 #undef TARGET_SCHED_ADJUST_COST
364 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
366 #undef TARGET_REGISTER_MOVE_COST
367 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
369 #undef TARGET_MEMORY_MOVE_COST
370 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
372 #undef TARGET_ENCODE_SECTION_INFO
373 #ifdef ARM_PE
374 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
375 #else
376 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
377 #endif
379 #undef TARGET_STRIP_NAME_ENCODING
380 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
382 #undef TARGET_ASM_INTERNAL_LABEL
383 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
385 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
386 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
388 #undef TARGET_FUNCTION_VALUE
389 #define TARGET_FUNCTION_VALUE arm_function_value
391 #undef TARGET_LIBCALL_VALUE
392 #define TARGET_LIBCALL_VALUE arm_libcall_value
394 #undef TARGET_FUNCTION_VALUE_REGNO_P
395 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
397 #undef TARGET_ASM_OUTPUT_MI_THUNK
398 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
399 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
400 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
402 #undef TARGET_RTX_COSTS
403 #define TARGET_RTX_COSTS arm_rtx_costs
404 #undef TARGET_ADDRESS_COST
405 #define TARGET_ADDRESS_COST arm_address_cost
407 #undef TARGET_SHIFT_TRUNCATION_MASK
408 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
409 #undef TARGET_VECTOR_MODE_SUPPORTED_P
410 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
411 #undef TARGET_ARRAY_MODE_SUPPORTED_P
412 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
413 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
414 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
415 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
416 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
417 arm_autovectorize_vector_sizes
419 #undef TARGET_MACHINE_DEPENDENT_REORG
420 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
422 #undef TARGET_INIT_BUILTINS
423 #define TARGET_INIT_BUILTINS arm_init_builtins
424 #undef TARGET_EXPAND_BUILTIN
425 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
426 #undef TARGET_BUILTIN_DECL
427 #define TARGET_BUILTIN_DECL arm_builtin_decl
429 #undef TARGET_INIT_LIBFUNCS
430 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
432 #undef TARGET_PROMOTE_FUNCTION_MODE
433 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
434 #undef TARGET_PROMOTE_PROTOTYPES
435 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
436 #undef TARGET_PASS_BY_REFERENCE
437 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
440 #undef TARGET_FUNCTION_ARG
441 #define TARGET_FUNCTION_ARG arm_function_arg
442 #undef TARGET_FUNCTION_ARG_ADVANCE
443 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
444 #undef TARGET_FUNCTION_ARG_BOUNDARY
445 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
447 #undef TARGET_SETUP_INCOMING_VARARGS
448 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
450 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
451 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
453 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
454 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
455 #undef TARGET_TRAMPOLINE_INIT
456 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
457 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
458 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
460 #undef TARGET_DEFAULT_SHORT_ENUMS
461 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
463 #undef TARGET_ALIGN_ANON_BITFIELD
464 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
466 #undef TARGET_NARROW_VOLATILE_BITFIELD
467 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
469 #undef TARGET_CXX_GUARD_TYPE
470 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
472 #undef TARGET_CXX_GUARD_MASK_BIT
473 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
475 #undef TARGET_CXX_GET_COOKIE_SIZE
476 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
478 #undef TARGET_CXX_COOKIE_HAS_SIZE
479 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
481 #undef TARGET_CXX_CDTOR_RETURNS_THIS
482 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
484 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
485 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
487 #undef TARGET_CXX_USE_AEABI_ATEXIT
488 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
490 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
491 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
492 arm_cxx_determine_class_data_visibility
494 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
495 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
497 #undef TARGET_RETURN_IN_MSB
498 #define TARGET_RETURN_IN_MSB arm_return_in_msb
500 #undef TARGET_RETURN_IN_MEMORY
501 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
503 #undef TARGET_MUST_PASS_IN_STACK
504 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
506 #if ARM_UNWIND_INFO
507 #undef TARGET_ASM_UNWIND_EMIT
508 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
510 /* EABI unwinding tables use a different format for the typeinfo tables. */
511 #undef TARGET_ASM_TTYPE
512 #define TARGET_ASM_TTYPE arm_output_ttype
514 #undef TARGET_ARM_EABI_UNWINDER
515 #define TARGET_ARM_EABI_UNWINDER true
517 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
518 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
520 #undef TARGET_ASM_INIT_SECTIONS
521 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
522 #endif /* ARM_UNWIND_INFO */
524 #undef TARGET_DWARF_REGISTER_SPAN
525 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
527 #undef TARGET_CANNOT_COPY_INSN_P
528 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
530 #ifdef HAVE_AS_TLS
531 #undef TARGET_HAVE_TLS
532 #define TARGET_HAVE_TLS true
533 #endif
535 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
536 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
538 #undef TARGET_LEGITIMATE_CONSTANT_P
539 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
541 #undef TARGET_CANNOT_FORCE_CONST_MEM
542 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
544 #undef TARGET_MAX_ANCHOR_OFFSET
545 #define TARGET_MAX_ANCHOR_OFFSET 4095
547 /* The minimum is set such that the total size of the block
548 for a particular anchor is -4088 + 1 + 4095 bytes, which is
549 divisible by eight, ensuring natural spacing of anchors. */
550 #undef TARGET_MIN_ANCHOR_OFFSET
551 #define TARGET_MIN_ANCHOR_OFFSET -4088
553 #undef TARGET_SCHED_ISSUE_RATE
554 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
556 #undef TARGET_MANGLE_TYPE
557 #define TARGET_MANGLE_TYPE arm_mangle_type
559 #undef TARGET_BUILD_BUILTIN_VA_LIST
560 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
561 #undef TARGET_EXPAND_BUILTIN_VA_START
562 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
563 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
564 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
566 #ifdef HAVE_AS_TLS
567 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
568 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
569 #endif
571 #undef TARGET_LEGITIMATE_ADDRESS_P
572 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
574 #undef TARGET_PREFERRED_RELOAD_CLASS
575 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
577 #undef TARGET_INVALID_PARAMETER_TYPE
578 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
580 #undef TARGET_INVALID_RETURN_TYPE
581 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
583 #undef TARGET_PROMOTED_TYPE
584 #define TARGET_PROMOTED_TYPE arm_promoted_type
586 #undef TARGET_CONVERT_TO_TYPE
587 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
589 #undef TARGET_SCALAR_MODE_SUPPORTED_P
590 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
592 #undef TARGET_FRAME_POINTER_REQUIRED
593 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
595 #undef TARGET_CAN_ELIMINATE
596 #define TARGET_CAN_ELIMINATE arm_can_eliminate
598 #undef TARGET_CONDITIONAL_REGISTER_USAGE
599 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
601 #undef TARGET_CLASS_LIKELY_SPILLED_P
602 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
604 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
605 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
606 arm_vector_alignment_reachable
608 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
609 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
610 arm_builtin_support_vector_misalignment
612 #undef TARGET_PREFERRED_RENAME_CLASS
613 #define TARGET_PREFERRED_RENAME_CLASS \
614 arm_preferred_rename_class
616 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
617 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
618 arm_vectorize_vec_perm_const_ok
620 struct gcc_target targetm = TARGET_INITIALIZER;
622 /* Obstack for minipool constant handling. */
623 static struct obstack minipool_obstack;
624 static char * minipool_startobj;
626 /* The maximum number of insns skipped which
627 will be conditionalised if possible. */
628 static int max_insns_skipped = 5;
630 extern FILE * asm_out_file;
632 /* True if we are currently building a constant table. */
633 int making_const_table;
635 /* The processor for which instructions should be scheduled. */
636 enum processor_type arm_tune = arm_none;
638 /* The current tuning set. */
639 const struct tune_params *current_tune;
641 /* Which floating point hardware to schedule for. */
642 int arm_fpu_attr;
644 /* Which floating popint hardware to use. */
645 const struct arm_fpu_desc *arm_fpu_desc;
647 /* Used for Thumb call_via trampolines. */
648 rtx thumb_call_via_label[14];
649 static int thumb_call_reg_needed;
651 /* Bit values used to identify processor capabilities. */
652 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
653 #define FL_ARCH3M (1 << 1) /* Extended multiply */
654 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
655 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
656 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
657 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
658 #define FL_THUMB (1 << 6) /* Thumb aware */
659 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
660 #define FL_STRONG (1 << 8) /* StrongARM */
661 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
662 #define FL_XSCALE (1 << 10) /* XScale */
663 /* spare (1 << 11) */
664 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
665 media instructions. */
666 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
667 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
668 Note: ARM6 & 7 derivatives only. */
669 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
670 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
671 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
672 profile. */
673 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
674 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
675 #define FL_NEON (1 << 20) /* Neon instructions. */
676 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
677 architecture. */
678 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
679 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
681 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
682 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
684 /* Flags that only effect tuning, not available instructions. */
685 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
686 | FL_CO_PROC)
688 #define FL_FOR_ARCH2 FL_NOTM
689 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
690 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
691 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
692 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
693 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
694 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
695 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
696 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
697 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
698 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
699 #define FL_FOR_ARCH6J FL_FOR_ARCH6
700 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
701 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
702 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
703 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
704 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
705 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
706 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
707 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
708 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
711 /* The bits in this mask specify which
712 instructions we are allowed to generate. */
713 static unsigned long insn_flags = 0;
715 /* The bits in this mask specify which instruction scheduling options should
716 be used. */
717 static unsigned long tune_flags = 0;
719 /* The highest ARM architecture version supported by the
720 target. */
721 enum base_architecture arm_base_arch = BASE_ARCH_0;
723 /* The following are used in the arm.md file as equivalents to bits
724 in the above two flag variables. */
726 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
727 int arm_arch3m = 0;
729 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
730 int arm_arch4 = 0;
732 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
733 int arm_arch4t = 0;
735 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
736 int arm_arch5 = 0;
738 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
739 int arm_arch5e = 0;
741 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
742 int arm_arch6 = 0;
744 /* Nonzero if this chip supports the ARM 6K extensions. */
745 int arm_arch6k = 0;
747 /* Nonzero if this chip supports the ARM 7 extensions. */
748 int arm_arch7 = 0;
750 /* Nonzero if instructions not present in the 'M' profile can be used. */
751 int arm_arch_notm = 0;
753 /* Nonzero if instructions present in ARMv7E-M can be used. */
754 int arm_arch7em = 0;
756 /* Nonzero if this chip can benefit from load scheduling. */
757 int arm_ld_sched = 0;
759 /* Nonzero if this chip is a StrongARM. */
760 int arm_tune_strongarm = 0;
762 /* Nonzero if this chip supports Intel Wireless MMX technology. */
763 int arm_arch_iwmmxt = 0;
765 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
766 int arm_arch_iwmmxt2 = 0;
768 /* Nonzero if this chip is an XScale. */
769 int arm_arch_xscale = 0;
771 /* Nonzero if tuning for XScale */
772 int arm_tune_xscale = 0;
774 /* Nonzero if we want to tune for stores that access the write-buffer.
775 This typically means an ARM6 or ARM7 with MMU or MPU. */
776 int arm_tune_wbuf = 0;
778 /* Nonzero if tuning for Cortex-A9. */
779 int arm_tune_cortex_a9 = 0;
781 /* Nonzero if generating Thumb instructions. */
782 int thumb_code = 0;
784 /* Nonzero if generating Thumb-1 instructions. */
785 int thumb1_code = 0;
787 /* Nonzero if we should define __THUMB_INTERWORK__ in the
788 preprocessor.
789 XXX This is a bit of a hack, it's intended to help work around
790 problems in GLD which doesn't understand that armv5t code is
791 interworking clean. */
792 int arm_cpp_interwork = 0;
794 /* Nonzero if chip supports Thumb 2. */
795 int arm_arch_thumb2;
797 /* Nonzero if chip supports integer division instruction. */
798 int arm_arch_arm_hwdiv;
799 int arm_arch_thumb_hwdiv;
801 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
802 we must report the mode of the memory reference from
803 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
804 enum machine_mode output_memory_reference_mode;
806 /* The register number to be used for the PIC offset register. */
807 unsigned arm_pic_register = INVALID_REGNUM;
809 /* Set to 1 after arm_reorg has started. Reset to start at the start of
810 the next function. */
811 static int after_arm_reorg = 0;
813 enum arm_pcs arm_pcs_default;
815 /* For an explanation of these variables, see final_prescan_insn below. */
816 int arm_ccfsm_state;
817 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
818 enum arm_cond_code arm_current_cc;
820 rtx arm_target_insn;
821 int arm_target_label;
822 /* The number of conditionally executed insns, including the current insn. */
823 int arm_condexec_count = 0;
824 /* A bitmask specifying the patterns for the IT block.
825 Zero means do not output an IT block before this insn. */
826 int arm_condexec_mask = 0;
827 /* The number of bits used in arm_condexec_mask. */
828 int arm_condexec_masklen = 0;
830 /* The condition codes of the ARM, and the inverse function. */
831 static const char * const arm_condition_codes[] =
833 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
834 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
837 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
838 int arm_regs_in_sequence[] =
840 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
843 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
844 #define streq(string1, string2) (strcmp (string1, string2) == 0)
846 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
847 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
848 | (1 << PIC_OFFSET_TABLE_REGNUM)))
850 /* Initialization code. */
852 struct processors
854 const char *const name;
855 enum processor_type core;
856 const char *arch;
857 enum base_architecture base_arch;
858 const unsigned long flags;
859 const struct tune_params *const tune;
863 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
864 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
865 prefetch_slots, \
866 l1_size, \
867 l1_line_size
869 const struct tune_params arm_slowmul_tune =
871 arm_slowmul_rtx_costs,
872 NULL,
873 3, /* Constant limit. */
874 5, /* Max cond insns. */
875 ARM_PREFETCH_NOT_BENEFICIAL,
876 true, /* Prefer constant pool. */
877 arm_default_branch_cost
880 const struct tune_params arm_fastmul_tune =
882 arm_fastmul_rtx_costs,
883 NULL,
884 1, /* Constant limit. */
885 5, /* Max cond insns. */
886 ARM_PREFETCH_NOT_BENEFICIAL,
887 true, /* Prefer constant pool. */
888 arm_default_branch_cost
891 /* StrongARM has early execution of branches, so a sequence that is worth
892 skipping is shorter. Set max_insns_skipped to a lower value. */
894 const struct tune_params arm_strongarm_tune =
896 arm_fastmul_rtx_costs,
897 NULL,
898 1, /* Constant limit. */
899 3, /* Max cond insns. */
900 ARM_PREFETCH_NOT_BENEFICIAL,
901 true, /* Prefer constant pool. */
902 arm_default_branch_cost
905 const struct tune_params arm_xscale_tune =
907 arm_xscale_rtx_costs,
908 xscale_sched_adjust_cost,
909 2, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost
916 const struct tune_params arm_9e_tune =
918 arm_9e_rtx_costs,
919 NULL,
920 1, /* Constant limit. */
921 5, /* Max cond insns. */
922 ARM_PREFETCH_NOT_BENEFICIAL,
923 true, /* Prefer constant pool. */
924 arm_default_branch_cost
927 const struct tune_params arm_v6t2_tune =
929 arm_9e_rtx_costs,
930 NULL,
931 1, /* Constant limit. */
932 5, /* Max cond insns. */
933 ARM_PREFETCH_NOT_BENEFICIAL,
934 false, /* Prefer constant pool. */
935 arm_default_branch_cost
938 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
939 const struct tune_params arm_cortex_tune =
941 arm_9e_rtx_costs,
942 NULL,
943 1, /* Constant limit. */
944 5, /* Max cond insns. */
945 ARM_PREFETCH_NOT_BENEFICIAL,
946 false, /* Prefer constant pool. */
947 arm_default_branch_cost
950 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
951 less appealing. Set max_insns_skipped to a low value. */
953 const struct tune_params arm_cortex_a5_tune =
955 arm_9e_rtx_costs,
956 NULL,
957 1, /* Constant limit. */
958 1, /* Max cond insns. */
959 ARM_PREFETCH_NOT_BENEFICIAL,
960 false, /* Prefer constant pool. */
961 arm_cortex_a5_branch_cost
964 const struct tune_params arm_cortex_a9_tune =
966 arm_9e_rtx_costs,
967 cortex_a9_sched_adjust_cost,
968 1, /* Constant limit. */
969 5, /* Max cond insns. */
970 ARM_PREFETCH_BENEFICIAL(4,32,32),
971 false, /* Prefer constant pool. */
972 arm_default_branch_cost
975 const struct tune_params arm_fa726te_tune =
977 arm_9e_rtx_costs,
978 fa726te_sched_adjust_cost,
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL,
982 true, /* Prefer constant pool. */
983 arm_default_branch_cost
987 /* Not all of these give usefully different compilation alternatives,
988 but there is no simple way of generalizing them. */
989 static const struct processors all_cores[] =
991 /* ARM Cores */
992 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
993 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
994 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
995 #include "arm-cores.def"
996 #undef ARM_CORE
997 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1000 static const struct processors all_architectures[] =
1002 /* ARM Architectures */
1003 /* We don't specify tuning costs here as it will be figured out
1004 from the core. */
1006 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1007 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1008 #include "arm-arches.def"
1009 #undef ARM_ARCH
1010 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1014 /* These are populated as commandline arguments are processed, or NULL
1015 if not specified. */
1016 static const struct processors *arm_selected_arch;
1017 static const struct processors *arm_selected_cpu;
1018 static const struct processors *arm_selected_tune;
1020 /* The name of the preprocessor macro to define for this architecture. */
1022 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1024 /* Available values for -mfpu=. */
1026 static const struct arm_fpu_desc all_fpus[] =
1028 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1029 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1030 #include "arm-fpus.def"
1031 #undef ARM_FPU
1035 /* Supported TLS relocations. */
1037 enum tls_reloc {
1038 TLS_GD32,
1039 TLS_LDM32,
1040 TLS_LDO32,
1041 TLS_IE32,
1042 TLS_LE32,
1043 TLS_DESCSEQ /* GNU scheme */
1046 /* The maximum number of insns to be used when loading a constant. */
1047 inline static int
1048 arm_constant_limit (bool size_p)
1050 return size_p ? 1 : current_tune->constant_limit;
1053 /* Emit an insn that's a simple single-set. Both the operands must be known
1054 to be valid. */
1055 inline static rtx
1056 emit_set_insn (rtx x, rtx y)
1058 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1061 /* Return the number of bits set in VALUE. */
1062 static unsigned
1063 bit_count (unsigned long value)
1065 unsigned long count = 0;
1067 while (value)
1069 count++;
1070 value &= value - 1; /* Clear the least-significant set bit. */
1073 return count;
1076 typedef struct
1078 enum machine_mode mode;
1079 const char *name;
1080 } arm_fixed_mode_set;
1082 /* A small helper for setting fixed-point library libfuncs. */
1084 static void
1085 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1086 const char *funcname, const char *modename,
1087 int num_suffix)
1089 char buffer[50];
1091 if (num_suffix == 0)
1092 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1093 else
1094 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1096 set_optab_libfunc (optable, mode, buffer);
1099 static void
1100 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1101 enum machine_mode from, const char *funcname,
1102 const char *toname, const char *fromname)
1104 char buffer[50];
1105 const char *maybe_suffix_2 = "";
1107 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1108 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1109 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1110 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1111 maybe_suffix_2 = "2";
1113 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1114 maybe_suffix_2);
1116 set_conv_libfunc (optable, to, from, buffer);
1119 /* Set up library functions unique to ARM. */
1121 static void
1122 arm_init_libfuncs (void)
1124 /* For Linux, we have access to kernel support for atomic operations. */
1125 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1126 init_sync_libfuncs (2 * UNITS_PER_WORD);
1128 /* There are no special library functions unless we are using the
1129 ARM BPABI. */
1130 if (!TARGET_BPABI)
1131 return;
1133 /* The functions below are described in Section 4 of the "Run-Time
1134 ABI for the ARM architecture", Version 1.0. */
1136 /* Double-precision floating-point arithmetic. Table 2. */
1137 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1138 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1139 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1140 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1141 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1143 /* Double-precision comparisons. Table 3. */
1144 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1145 set_optab_libfunc (ne_optab, DFmode, NULL);
1146 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1147 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1148 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1149 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1150 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1152 /* Single-precision floating-point arithmetic. Table 4. */
1153 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1154 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1155 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1156 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1157 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1159 /* Single-precision comparisons. Table 5. */
1160 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1161 set_optab_libfunc (ne_optab, SFmode, NULL);
1162 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1163 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1164 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1165 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1166 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1168 /* Floating-point to integer conversions. Table 6. */
1169 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1170 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1171 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1172 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1173 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1174 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1175 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1176 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1178 /* Conversions between floating types. Table 7. */
1179 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1180 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1182 /* Integer to floating-point conversions. Table 8. */
1183 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1184 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1185 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1186 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1187 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1188 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1189 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1190 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1192 /* Long long. Table 9. */
1193 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1194 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1195 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1196 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1197 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1198 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1199 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1200 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1202 /* Integer (32/32->32) division. \S 4.3.1. */
1203 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1204 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1206 /* The divmod functions are designed so that they can be used for
1207 plain division, even though they return both the quotient and the
1208 remainder. The quotient is returned in the usual location (i.e.,
1209 r0 for SImode, {r0, r1} for DImode), just as would be expected
1210 for an ordinary division routine. Because the AAPCS calling
1211 conventions specify that all of { r0, r1, r2, r3 } are
1212 callee-saved registers, there is no need to tell the compiler
1213 explicitly that those registers are clobbered by these
1214 routines. */
1215 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1216 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1218 /* For SImode division the ABI provides div-without-mod routines,
1219 which are faster. */
1220 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1221 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1223 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1224 divmod libcalls instead. */
1225 set_optab_libfunc (smod_optab, DImode, NULL);
1226 set_optab_libfunc (umod_optab, DImode, NULL);
1227 set_optab_libfunc (smod_optab, SImode, NULL);
1228 set_optab_libfunc (umod_optab, SImode, NULL);
1230 /* Half-precision float operations. The compiler handles all operations
1231 with NULL libfuncs by converting the SFmode. */
1232 switch (arm_fp16_format)
1234 case ARM_FP16_FORMAT_IEEE:
1235 case ARM_FP16_FORMAT_ALTERNATIVE:
1237 /* Conversions. */
1238 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1239 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1240 ? "__gnu_f2h_ieee"
1241 : "__gnu_f2h_alternative"));
1242 set_conv_libfunc (sext_optab, SFmode, HFmode,
1243 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1244 ? "__gnu_h2f_ieee"
1245 : "__gnu_h2f_alternative"));
1247 /* Arithmetic. */
1248 set_optab_libfunc (add_optab, HFmode, NULL);
1249 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1250 set_optab_libfunc (smul_optab, HFmode, NULL);
1251 set_optab_libfunc (neg_optab, HFmode, NULL);
1252 set_optab_libfunc (sub_optab, HFmode, NULL);
1254 /* Comparisons. */
1255 set_optab_libfunc (eq_optab, HFmode, NULL);
1256 set_optab_libfunc (ne_optab, HFmode, NULL);
1257 set_optab_libfunc (lt_optab, HFmode, NULL);
1258 set_optab_libfunc (le_optab, HFmode, NULL);
1259 set_optab_libfunc (ge_optab, HFmode, NULL);
1260 set_optab_libfunc (gt_optab, HFmode, NULL);
1261 set_optab_libfunc (unord_optab, HFmode, NULL);
1262 break;
1264 default:
1265 break;
1268 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1270 const arm_fixed_mode_set fixed_arith_modes[] =
1272 { QQmode, "qq" },
1273 { UQQmode, "uqq" },
1274 { HQmode, "hq" },
1275 { UHQmode, "uhq" },
1276 { SQmode, "sq" },
1277 { USQmode, "usq" },
1278 { DQmode, "dq" },
1279 { UDQmode, "udq" },
1280 { TQmode, "tq" },
1281 { UTQmode, "utq" },
1282 { HAmode, "ha" },
1283 { UHAmode, "uha" },
1284 { SAmode, "sa" },
1285 { USAmode, "usa" },
1286 { DAmode, "da" },
1287 { UDAmode, "uda" },
1288 { TAmode, "ta" },
1289 { UTAmode, "uta" }
1291 const arm_fixed_mode_set fixed_conv_modes[] =
1293 { QQmode, "qq" },
1294 { UQQmode, "uqq" },
1295 { HQmode, "hq" },
1296 { UHQmode, "uhq" },
1297 { SQmode, "sq" },
1298 { USQmode, "usq" },
1299 { DQmode, "dq" },
1300 { UDQmode, "udq" },
1301 { TQmode, "tq" },
1302 { UTQmode, "utq" },
1303 { HAmode, "ha" },
1304 { UHAmode, "uha" },
1305 { SAmode, "sa" },
1306 { USAmode, "usa" },
1307 { DAmode, "da" },
1308 { UDAmode, "uda" },
1309 { TAmode, "ta" },
1310 { UTAmode, "uta" },
1311 { QImode, "qi" },
1312 { HImode, "hi" },
1313 { SImode, "si" },
1314 { DImode, "di" },
1315 { TImode, "ti" },
1316 { SFmode, "sf" },
1317 { DFmode, "df" }
1319 unsigned int i, j;
1321 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1323 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1324 "add", fixed_arith_modes[i].name, 3);
1325 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1326 "ssadd", fixed_arith_modes[i].name, 3);
1327 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1328 "usadd", fixed_arith_modes[i].name, 3);
1329 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1330 "sub", fixed_arith_modes[i].name, 3);
1331 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1332 "sssub", fixed_arith_modes[i].name, 3);
1333 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1334 "ussub", fixed_arith_modes[i].name, 3);
1335 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1336 "mul", fixed_arith_modes[i].name, 3);
1337 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1338 "ssmul", fixed_arith_modes[i].name, 3);
1339 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1340 "usmul", fixed_arith_modes[i].name, 3);
1341 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1342 "div", fixed_arith_modes[i].name, 3);
1343 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1344 "udiv", fixed_arith_modes[i].name, 3);
1345 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1346 "ssdiv", fixed_arith_modes[i].name, 3);
1347 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1348 "usdiv", fixed_arith_modes[i].name, 3);
1349 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1350 "neg", fixed_arith_modes[i].name, 2);
1351 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1352 "ssneg", fixed_arith_modes[i].name, 2);
1353 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1354 "usneg", fixed_arith_modes[i].name, 2);
1355 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1356 "ashl", fixed_arith_modes[i].name, 3);
1357 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1358 "ashr", fixed_arith_modes[i].name, 3);
1359 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1360 "lshr", fixed_arith_modes[i].name, 3);
1361 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1362 "ssashl", fixed_arith_modes[i].name, 3);
1363 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1364 "usashl", fixed_arith_modes[i].name, 3);
1365 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1366 "cmp", fixed_arith_modes[i].name, 2);
1369 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1370 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1372 if (i == j
1373 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1374 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1375 continue;
1377 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1378 fixed_conv_modes[j].mode, "fract",
1379 fixed_conv_modes[i].name,
1380 fixed_conv_modes[j].name);
1381 arm_set_fixed_conv_libfunc (satfract_optab,
1382 fixed_conv_modes[i].mode,
1383 fixed_conv_modes[j].mode, "satfract",
1384 fixed_conv_modes[i].name,
1385 fixed_conv_modes[j].name);
1386 arm_set_fixed_conv_libfunc (fractuns_optab,
1387 fixed_conv_modes[i].mode,
1388 fixed_conv_modes[j].mode, "fractuns",
1389 fixed_conv_modes[i].name,
1390 fixed_conv_modes[j].name);
1391 arm_set_fixed_conv_libfunc (satfractuns_optab,
1392 fixed_conv_modes[i].mode,
1393 fixed_conv_modes[j].mode, "satfractuns",
1394 fixed_conv_modes[i].name,
1395 fixed_conv_modes[j].name);
1399 if (TARGET_AAPCS_BASED)
1400 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1403 /* On AAPCS systems, this is the "struct __va_list". */
1404 static GTY(()) tree va_list_type;
1406 /* Return the type to use as __builtin_va_list. */
1407 static tree
1408 arm_build_builtin_va_list (void)
1410 tree va_list_name;
1411 tree ap_field;
1413 if (!TARGET_AAPCS_BASED)
1414 return std_build_builtin_va_list ();
1416 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1417 defined as:
1419 struct __va_list
1421 void *__ap;
1424 The C Library ABI further reinforces this definition in \S
1425 4.1.
1427 We must follow this definition exactly. The structure tag
1428 name is visible in C++ mangled names, and thus forms a part
1429 of the ABI. The field name may be used by people who
1430 #include <stdarg.h>. */
1431 /* Create the type. */
1432 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1433 /* Give it the required name. */
1434 va_list_name = build_decl (BUILTINS_LOCATION,
1435 TYPE_DECL,
1436 get_identifier ("__va_list"),
1437 va_list_type);
1438 DECL_ARTIFICIAL (va_list_name) = 1;
1439 TYPE_NAME (va_list_type) = va_list_name;
1440 TYPE_STUB_DECL (va_list_type) = va_list_name;
1441 /* Create the __ap field. */
1442 ap_field = build_decl (BUILTINS_LOCATION,
1443 FIELD_DECL,
1444 get_identifier ("__ap"),
1445 ptr_type_node);
1446 DECL_ARTIFICIAL (ap_field) = 1;
1447 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1448 TYPE_FIELDS (va_list_type) = ap_field;
1449 /* Compute its layout. */
1450 layout_type (va_list_type);
1452 return va_list_type;
1455 /* Return an expression of type "void *" pointing to the next
1456 available argument in a variable-argument list. VALIST is the
1457 user-level va_list object, of type __builtin_va_list. */
1458 static tree
1459 arm_extract_valist_ptr (tree valist)
1461 if (TREE_TYPE (valist) == error_mark_node)
1462 return error_mark_node;
1464 /* On an AAPCS target, the pointer is stored within "struct
1465 va_list". */
1466 if (TARGET_AAPCS_BASED)
1468 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1469 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1470 valist, ap_field, NULL_TREE);
1473 return valist;
1476 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1477 static void
1478 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1480 valist = arm_extract_valist_ptr (valist);
1481 std_expand_builtin_va_start (valist, nextarg);
1484 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1485 static tree
1486 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1487 gimple_seq *post_p)
1489 valist = arm_extract_valist_ptr (valist);
1490 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1493 /* Fix up any incompatible options that the user has specified. */
1494 static void
1495 arm_option_override (void)
1497 if (global_options_set.x_arm_arch_option)
1498 arm_selected_arch = &all_architectures[arm_arch_option];
1500 if (global_options_set.x_arm_cpu_option)
1501 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1503 if (global_options_set.x_arm_tune_option)
1504 arm_selected_tune = &all_cores[(int) arm_tune_option];
1506 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1507 SUBTARGET_OVERRIDE_OPTIONS;
1508 #endif
1510 if (arm_selected_arch)
1512 if (arm_selected_cpu)
1514 /* Check for conflict between mcpu and march. */
1515 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1517 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1518 arm_selected_cpu->name, arm_selected_arch->name);
1519 /* -march wins for code generation.
1520 -mcpu wins for default tuning. */
1521 if (!arm_selected_tune)
1522 arm_selected_tune = arm_selected_cpu;
1524 arm_selected_cpu = arm_selected_arch;
1526 else
1527 /* -mcpu wins. */
1528 arm_selected_arch = NULL;
1530 else
1531 /* Pick a CPU based on the architecture. */
1532 arm_selected_cpu = arm_selected_arch;
1535 /* If the user did not specify a processor, choose one for them. */
1536 if (!arm_selected_cpu)
1538 const struct processors * sel;
1539 unsigned int sought;
1541 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1542 if (!arm_selected_cpu->name)
1544 #ifdef SUBTARGET_CPU_DEFAULT
1545 /* Use the subtarget default CPU if none was specified by
1546 configure. */
1547 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1548 #endif
1549 /* Default to ARM6. */
1550 if (!arm_selected_cpu->name)
1551 arm_selected_cpu = &all_cores[arm6];
1554 sel = arm_selected_cpu;
1555 insn_flags = sel->flags;
1557 /* Now check to see if the user has specified some command line
1558 switch that require certain abilities from the cpu. */
1559 sought = 0;
1561 if (TARGET_INTERWORK || TARGET_THUMB)
1563 sought |= (FL_THUMB | FL_MODE32);
1565 /* There are no ARM processors that support both APCS-26 and
1566 interworking. Therefore we force FL_MODE26 to be removed
1567 from insn_flags here (if it was set), so that the search
1568 below will always be able to find a compatible processor. */
1569 insn_flags &= ~FL_MODE26;
1572 if (sought != 0 && ((sought & insn_flags) != sought))
1574 /* Try to locate a CPU type that supports all of the abilities
1575 of the default CPU, plus the extra abilities requested by
1576 the user. */
1577 for (sel = all_cores; sel->name != NULL; sel++)
1578 if ((sel->flags & sought) == (sought | insn_flags))
1579 break;
1581 if (sel->name == NULL)
1583 unsigned current_bit_count = 0;
1584 const struct processors * best_fit = NULL;
1586 /* Ideally we would like to issue an error message here
1587 saying that it was not possible to find a CPU compatible
1588 with the default CPU, but which also supports the command
1589 line options specified by the programmer, and so they
1590 ought to use the -mcpu=<name> command line option to
1591 override the default CPU type.
1593 If we cannot find a cpu that has both the
1594 characteristics of the default cpu and the given
1595 command line options we scan the array again looking
1596 for a best match. */
1597 for (sel = all_cores; sel->name != NULL; sel++)
1598 if ((sel->flags & sought) == sought)
1600 unsigned count;
1602 count = bit_count (sel->flags & insn_flags);
1604 if (count >= current_bit_count)
1606 best_fit = sel;
1607 current_bit_count = count;
1611 gcc_assert (best_fit);
1612 sel = best_fit;
1615 arm_selected_cpu = sel;
1619 gcc_assert (arm_selected_cpu);
1620 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1621 if (!arm_selected_tune)
1622 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1624 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1625 insn_flags = arm_selected_cpu->flags;
1626 arm_base_arch = arm_selected_cpu->base_arch;
1628 arm_tune = arm_selected_tune->core;
1629 tune_flags = arm_selected_tune->flags;
1630 current_tune = arm_selected_tune->tune;
1632 /* Make sure that the processor choice does not conflict with any of the
1633 other command line choices. */
1634 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1635 error ("target CPU does not support ARM mode");
1637 /* BPABI targets use linker tricks to allow interworking on cores
1638 without thumb support. */
1639 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1641 warning (0, "target CPU does not support interworking" );
1642 target_flags &= ~MASK_INTERWORK;
1645 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1647 warning (0, "target CPU does not support THUMB instructions");
1648 target_flags &= ~MASK_THUMB;
1651 if (TARGET_APCS_FRAME && TARGET_THUMB)
1653 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1654 target_flags &= ~MASK_APCS_FRAME;
1657 /* Callee super interworking implies thumb interworking. Adding
1658 this to the flags here simplifies the logic elsewhere. */
1659 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1660 target_flags |= MASK_INTERWORK;
1662 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1663 from here where no function is being compiled currently. */
1664 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1665 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1667 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1668 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1670 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1672 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1673 target_flags |= MASK_APCS_FRAME;
1676 if (TARGET_POKE_FUNCTION_NAME)
1677 target_flags |= MASK_APCS_FRAME;
1679 if (TARGET_APCS_REENT && flag_pic)
1680 error ("-fpic and -mapcs-reent are incompatible");
1682 if (TARGET_APCS_REENT)
1683 warning (0, "APCS reentrant code not supported. Ignored");
1685 /* If this target is normally configured to use APCS frames, warn if they
1686 are turned off and debugging is turned on. */
1687 if (TARGET_ARM
1688 && write_symbols != NO_DEBUG
1689 && !TARGET_APCS_FRAME
1690 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1691 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1693 if (TARGET_APCS_FLOAT)
1694 warning (0, "passing floating point arguments in fp regs not yet supported");
1696 if (TARGET_LITTLE_WORDS)
1697 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1698 "will be removed in a future release");
1700 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1701 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1702 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1703 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1704 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1705 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1706 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1707 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1708 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1709 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1710 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1711 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1712 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1714 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1715 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1716 thumb_code = TARGET_ARM == 0;
1717 thumb1_code = TARGET_THUMB1 != 0;
1718 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1719 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1720 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1721 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1722 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1723 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1724 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1726 /* If we are not using the default (ARM mode) section anchor offset
1727 ranges, then set the correct ranges now. */
1728 if (TARGET_THUMB1)
1730 /* Thumb-1 LDR instructions cannot have negative offsets.
1731 Permissible positive offset ranges are 5-bit (for byte loads),
1732 6-bit (for halfword loads), or 7-bit (for word loads).
1733 Empirical results suggest a 7-bit anchor range gives the best
1734 overall code size. */
1735 targetm.min_anchor_offset = 0;
1736 targetm.max_anchor_offset = 127;
1738 else if (TARGET_THUMB2)
1740 /* The minimum is set such that the total size of the block
1741 for a particular anchor is 248 + 1 + 4095 bytes, which is
1742 divisible by eight, ensuring natural spacing of anchors. */
1743 targetm.min_anchor_offset = -248;
1744 targetm.max_anchor_offset = 4095;
1747 /* V5 code we generate is completely interworking capable, so we turn off
1748 TARGET_INTERWORK here to avoid many tests later on. */
1750 /* XXX However, we must pass the right pre-processor defines to CPP
1751 or GLD can get confused. This is a hack. */
1752 if (TARGET_INTERWORK)
1753 arm_cpp_interwork = 1;
1755 if (arm_arch5)
1756 target_flags &= ~MASK_INTERWORK;
1758 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1759 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1761 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1762 error ("iwmmxt abi requires an iwmmxt capable cpu");
1764 if (!global_options_set.x_arm_fpu_index)
1766 const char *target_fpu_name;
1767 bool ok;
1769 #ifdef FPUTYPE_DEFAULT
1770 target_fpu_name = FPUTYPE_DEFAULT;
1771 #else
1772 target_fpu_name = "vfp";
1773 #endif
1775 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1776 CL_TARGET);
1777 gcc_assert (ok);
1780 arm_fpu_desc = &all_fpus[arm_fpu_index];
1782 switch (arm_fpu_desc->model)
1784 case ARM_FP_MODEL_VFP:
1785 arm_fpu_attr = FPU_VFP;
1786 break;
1788 default:
1789 gcc_unreachable();
1792 if (TARGET_AAPCS_BASED)
1794 if (TARGET_CALLER_INTERWORKING)
1795 error ("AAPCS does not support -mcaller-super-interworking");
1796 else
1797 if (TARGET_CALLEE_INTERWORKING)
1798 error ("AAPCS does not support -mcallee-super-interworking");
1801 /* iWMMXt and NEON are incompatible. */
1802 if (TARGET_IWMMXT && TARGET_NEON)
1803 error ("iWMMXt and NEON are incompatible");
1805 /* iWMMXt unsupported under Thumb mode. */
1806 if (TARGET_THUMB && TARGET_IWMMXT)
1807 error ("iWMMXt unsupported under Thumb mode");
1809 /* __fp16 support currently assumes the core has ldrh. */
1810 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1811 sorry ("__fp16 and no ldrh");
1813 /* If soft-float is specified then don't use FPU. */
1814 if (TARGET_SOFT_FLOAT)
1815 arm_fpu_attr = FPU_NONE;
1817 if (TARGET_AAPCS_BASED)
1819 if (arm_abi == ARM_ABI_IWMMXT)
1820 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1821 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1822 && TARGET_HARD_FLOAT
1823 && TARGET_VFP)
1824 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1825 else
1826 arm_pcs_default = ARM_PCS_AAPCS;
1828 else
1830 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1831 sorry ("-mfloat-abi=hard and VFP");
1833 if (arm_abi == ARM_ABI_APCS)
1834 arm_pcs_default = ARM_PCS_APCS;
1835 else
1836 arm_pcs_default = ARM_PCS_ATPCS;
1839 /* For arm2/3 there is no need to do any scheduling if we are doing
1840 software floating-point. */
1841 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1842 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1844 /* Use the cp15 method if it is available. */
1845 if (target_thread_pointer == TP_AUTO)
1847 if (arm_arch6k && !TARGET_THUMB1)
1848 target_thread_pointer = TP_CP15;
1849 else
1850 target_thread_pointer = TP_SOFT;
1853 if (TARGET_HARD_TP && TARGET_THUMB1)
1854 error ("can not use -mtp=cp15 with 16-bit Thumb");
1856 /* Override the default structure alignment for AAPCS ABI. */
1857 if (!global_options_set.x_arm_structure_size_boundary)
1859 if (TARGET_AAPCS_BASED)
1860 arm_structure_size_boundary = 8;
1862 else
1864 if (arm_structure_size_boundary != 8
1865 && arm_structure_size_boundary != 32
1866 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1868 if (ARM_DOUBLEWORD_ALIGN)
1869 warning (0,
1870 "structure size boundary can only be set to 8, 32 or 64");
1871 else
1872 warning (0, "structure size boundary can only be set to 8 or 32");
1873 arm_structure_size_boundary
1874 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1878 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1880 error ("RTP PIC is incompatible with Thumb");
1881 flag_pic = 0;
1884 /* If stack checking is disabled, we can use r10 as the PIC register,
1885 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1886 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1888 if (TARGET_VXWORKS_RTP)
1889 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1890 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1893 if (flag_pic && TARGET_VXWORKS_RTP)
1894 arm_pic_register = 9;
1896 if (arm_pic_register_string != NULL)
1898 int pic_register = decode_reg_name (arm_pic_register_string);
1900 if (!flag_pic)
1901 warning (0, "-mpic-register= is useless without -fpic");
1903 /* Prevent the user from choosing an obviously stupid PIC register. */
1904 else if (pic_register < 0 || call_used_regs[pic_register]
1905 || pic_register == HARD_FRAME_POINTER_REGNUM
1906 || pic_register == STACK_POINTER_REGNUM
1907 || pic_register >= PC_REGNUM
1908 || (TARGET_VXWORKS_RTP
1909 && (unsigned int) pic_register != arm_pic_register))
1910 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1911 else
1912 arm_pic_register = pic_register;
1915 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1916 if (fix_cm3_ldrd == 2)
1918 if (arm_selected_cpu->core == cortexm3)
1919 fix_cm3_ldrd = 1;
1920 else
1921 fix_cm3_ldrd = 0;
1924 /* Enable -munaligned-access by default for
1925 - all ARMv6 architecture-based processors
1926 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1928 Disable -munaligned-access by default for
1929 - all pre-ARMv6 architecture-based processors
1930 - ARMv6-M architecture-based processors. */
1932 if (unaligned_access == 2)
1934 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1935 unaligned_access = 1;
1936 else
1937 unaligned_access = 0;
1939 else if (unaligned_access == 1
1940 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1942 warning (0, "target CPU does not support unaligned accesses");
1943 unaligned_access = 0;
1946 if (TARGET_THUMB1 && flag_schedule_insns)
1948 /* Don't warn since it's on by default in -O2. */
1949 flag_schedule_insns = 0;
1952 if (optimize_size)
1954 /* If optimizing for size, bump the number of instructions that we
1955 are prepared to conditionally execute (even on a StrongARM). */
1956 max_insns_skipped = 6;
1958 else
1959 max_insns_skipped = current_tune->max_insns_skipped;
1961 /* Hot/Cold partitioning is not currently supported, since we can't
1962 handle literal pool placement in that case. */
1963 if (flag_reorder_blocks_and_partition)
1965 inform (input_location,
1966 "-freorder-blocks-and-partition not supported on this architecture");
1967 flag_reorder_blocks_and_partition = 0;
1968 flag_reorder_blocks = 1;
1971 if (flag_pic)
1972 /* Hoisting PIC address calculations more aggressively provides a small,
1973 but measurable, size reduction for PIC code. Therefore, we decrease
1974 the bar for unrestricted expression hoisting to the cost of PIC address
1975 calculation, which is 2 instructions. */
1976 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1977 global_options.x_param_values,
1978 global_options_set.x_param_values);
1980 /* ARM EABI defaults to strict volatile bitfields. */
1981 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
1982 && abi_version_at_least(2))
1983 flag_strict_volatile_bitfields = 1;
1985 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1986 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1987 if (flag_prefetch_loop_arrays < 0
1988 && HAVE_prefetch
1989 && optimize >= 3
1990 && current_tune->num_prefetch_slots > 0)
1991 flag_prefetch_loop_arrays = 1;
1993 /* Set up parameters to be used in prefetching algorithm. Do not override the
1994 defaults unless we are tuning for a core we have researched values for. */
1995 if (current_tune->num_prefetch_slots > 0)
1996 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1997 current_tune->num_prefetch_slots,
1998 global_options.x_param_values,
1999 global_options_set.x_param_values);
2000 if (current_tune->l1_cache_line_size >= 0)
2001 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2002 current_tune->l1_cache_line_size,
2003 global_options.x_param_values,
2004 global_options_set.x_param_values);
2005 if (current_tune->l1_cache_size >= 0)
2006 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2007 current_tune->l1_cache_size,
2008 global_options.x_param_values,
2009 global_options_set.x_param_values);
2011 /* Register global variables with the garbage collector. */
2012 arm_add_gc_roots ();
2015 static void
2016 arm_add_gc_roots (void)
2018 gcc_obstack_init(&minipool_obstack);
2019 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2022 /* A table of known ARM exception types.
2023 For use with the interrupt function attribute. */
2025 typedef struct
2027 const char *const arg;
2028 const unsigned long return_value;
2030 isr_attribute_arg;
2032 static const isr_attribute_arg isr_attribute_args [] =
2034 { "IRQ", ARM_FT_ISR },
2035 { "irq", ARM_FT_ISR },
2036 { "FIQ", ARM_FT_FIQ },
2037 { "fiq", ARM_FT_FIQ },
2038 { "ABORT", ARM_FT_ISR },
2039 { "abort", ARM_FT_ISR },
2040 { "ABORT", ARM_FT_ISR },
2041 { "abort", ARM_FT_ISR },
2042 { "UNDEF", ARM_FT_EXCEPTION },
2043 { "undef", ARM_FT_EXCEPTION },
2044 { "SWI", ARM_FT_EXCEPTION },
2045 { "swi", ARM_FT_EXCEPTION },
2046 { NULL, ARM_FT_NORMAL }
2049 /* Returns the (interrupt) function type of the current
2050 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2052 static unsigned long
2053 arm_isr_value (tree argument)
2055 const isr_attribute_arg * ptr;
2056 const char * arg;
2058 if (!arm_arch_notm)
2059 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2061 /* No argument - default to IRQ. */
2062 if (argument == NULL_TREE)
2063 return ARM_FT_ISR;
2065 /* Get the value of the argument. */
2066 if (TREE_VALUE (argument) == NULL_TREE
2067 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2068 return ARM_FT_UNKNOWN;
2070 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2072 /* Check it against the list of known arguments. */
2073 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2074 if (streq (arg, ptr->arg))
2075 return ptr->return_value;
2077 /* An unrecognized interrupt type. */
2078 return ARM_FT_UNKNOWN;
2081 /* Computes the type of the current function. */
2083 static unsigned long
2084 arm_compute_func_type (void)
2086 unsigned long type = ARM_FT_UNKNOWN;
2087 tree a;
2088 tree attr;
2090 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2092 /* Decide if the current function is volatile. Such functions
2093 never return, and many memory cycles can be saved by not storing
2094 register values that will never be needed again. This optimization
2095 was added to speed up context switching in a kernel application. */
2096 if (optimize > 0
2097 && (TREE_NOTHROW (current_function_decl)
2098 || !(flag_unwind_tables
2099 || (flag_exceptions
2100 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2101 && TREE_THIS_VOLATILE (current_function_decl))
2102 type |= ARM_FT_VOLATILE;
2104 if (cfun->static_chain_decl != NULL)
2105 type |= ARM_FT_NESTED;
2107 attr = DECL_ATTRIBUTES (current_function_decl);
2109 a = lookup_attribute ("naked", attr);
2110 if (a != NULL_TREE)
2111 type |= ARM_FT_NAKED;
2113 a = lookup_attribute ("isr", attr);
2114 if (a == NULL_TREE)
2115 a = lookup_attribute ("interrupt", attr);
2117 if (a == NULL_TREE)
2118 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2119 else
2120 type |= arm_isr_value (TREE_VALUE (a));
2122 return type;
2125 /* Returns the type of the current function. */
2127 unsigned long
2128 arm_current_func_type (void)
2130 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2131 cfun->machine->func_type = arm_compute_func_type ();
2133 return cfun->machine->func_type;
2136 bool
2137 arm_allocate_stack_slots_for_args (void)
2139 /* Naked functions should not allocate stack slots for arguments. */
2140 return !IS_NAKED (arm_current_func_type ());
2144 /* Output assembler code for a block containing the constant parts
2145 of a trampoline, leaving space for the variable parts.
2147 On the ARM, (if r8 is the static chain regnum, and remembering that
2148 referencing pc adds an offset of 8) the trampoline looks like:
2149 ldr r8, [pc, #0]
2150 ldr pc, [pc]
2151 .word static chain value
2152 .word function's address
2153 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2155 static void
2156 arm_asm_trampoline_template (FILE *f)
2158 if (TARGET_ARM)
2160 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2161 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2163 else if (TARGET_THUMB2)
2165 /* The Thumb-2 trampoline is similar to the arm implementation.
2166 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2167 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2168 STATIC_CHAIN_REGNUM, PC_REGNUM);
2169 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2171 else
2173 ASM_OUTPUT_ALIGN (f, 2);
2174 fprintf (f, "\t.code\t16\n");
2175 fprintf (f, ".Ltrampoline_start:\n");
2176 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2177 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2178 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2179 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2180 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2181 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2183 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2184 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2187 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2189 static void
2190 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2192 rtx fnaddr, mem, a_tramp;
2194 emit_block_move (m_tramp, assemble_trampoline_template (),
2195 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2197 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2198 emit_move_insn (mem, chain_value);
2200 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2201 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2202 emit_move_insn (mem, fnaddr);
2204 a_tramp = XEXP (m_tramp, 0);
2205 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2206 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2207 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2210 /* Thumb trampolines should be entered in thumb mode, so set
2211 the bottom bit of the address. */
2213 static rtx
2214 arm_trampoline_adjust_address (rtx addr)
2216 if (TARGET_THUMB)
2217 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2218 NULL, 0, OPTAB_LIB_WIDEN);
2219 return addr;
2222 /* Return 1 if it is possible to return using a single instruction.
2223 If SIBLING is non-null, this is a test for a return before a sibling
2224 call. SIBLING is the call insn, so we can examine its register usage. */
2227 use_return_insn (int iscond, rtx sibling)
2229 int regno;
2230 unsigned int func_type;
2231 unsigned long saved_int_regs;
2232 unsigned HOST_WIDE_INT stack_adjust;
2233 arm_stack_offsets *offsets;
2235 /* Never use a return instruction before reload has run. */
2236 if (!reload_completed)
2237 return 0;
2239 func_type = arm_current_func_type ();
2241 /* Naked, volatile and stack alignment functions need special
2242 consideration. */
2243 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2244 return 0;
2246 /* So do interrupt functions that use the frame pointer and Thumb
2247 interrupt functions. */
2248 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2249 return 0;
2251 offsets = arm_get_frame_offsets ();
2252 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2254 /* As do variadic functions. */
2255 if (crtl->args.pretend_args_size
2256 || cfun->machine->uses_anonymous_args
2257 /* Or if the function calls __builtin_eh_return () */
2258 || crtl->calls_eh_return
2259 /* Or if the function calls alloca */
2260 || cfun->calls_alloca
2261 /* Or if there is a stack adjustment. However, if the stack pointer
2262 is saved on the stack, we can use a pre-incrementing stack load. */
2263 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2264 && stack_adjust == 4)))
2265 return 0;
2267 saved_int_regs = offsets->saved_regs_mask;
2269 /* Unfortunately, the insn
2271 ldmib sp, {..., sp, ...}
2273 triggers a bug on most SA-110 based devices, such that the stack
2274 pointer won't be correctly restored if the instruction takes a
2275 page fault. We work around this problem by popping r3 along with
2276 the other registers, since that is never slower than executing
2277 another instruction.
2279 We test for !arm_arch5 here, because code for any architecture
2280 less than this could potentially be run on one of the buggy
2281 chips. */
2282 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2284 /* Validate that r3 is a call-clobbered register (always true in
2285 the default abi) ... */
2286 if (!call_used_regs[3])
2287 return 0;
2289 /* ... that it isn't being used for a return value ... */
2290 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2291 return 0;
2293 /* ... or for a tail-call argument ... */
2294 if (sibling)
2296 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2298 if (find_regno_fusage (sibling, USE, 3))
2299 return 0;
2302 /* ... and that there are no call-saved registers in r0-r2
2303 (always true in the default ABI). */
2304 if (saved_int_regs & 0x7)
2305 return 0;
2308 /* Can't be done if interworking with Thumb, and any registers have been
2309 stacked. */
2310 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2311 return 0;
2313 /* On StrongARM, conditional returns are expensive if they aren't
2314 taken and multiple registers have been stacked. */
2315 if (iscond && arm_tune_strongarm)
2317 /* Conditional return when just the LR is stored is a simple
2318 conditional-load instruction, that's not expensive. */
2319 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2320 return 0;
2322 if (flag_pic
2323 && arm_pic_register != INVALID_REGNUM
2324 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2325 return 0;
2328 /* If there are saved registers but the LR isn't saved, then we need
2329 two instructions for the return. */
2330 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2331 return 0;
2333 /* Can't be done if any of the VFP regs are pushed,
2334 since this also requires an insn. */
2335 if (TARGET_HARD_FLOAT && TARGET_VFP)
2336 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2337 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2338 return 0;
2340 if (TARGET_REALLY_IWMMXT)
2341 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2342 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2343 return 0;
2345 return 1;
2348 /* Return TRUE if int I is a valid immediate ARM constant. */
2351 const_ok_for_arm (HOST_WIDE_INT i)
2353 int lowbit;
2355 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2356 be all zero, or all one. */
2357 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2358 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2359 != ((~(unsigned HOST_WIDE_INT) 0)
2360 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2361 return FALSE;
2363 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2365 /* Fast return for 0 and small values. We must do this for zero, since
2366 the code below can't handle that one case. */
2367 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2368 return TRUE;
2370 /* Get the number of trailing zeros. */
2371 lowbit = ffs((int) i) - 1;
2373 /* Only even shifts are allowed in ARM mode so round down to the
2374 nearest even number. */
2375 if (TARGET_ARM)
2376 lowbit &= ~1;
2378 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2379 return TRUE;
2381 if (TARGET_ARM)
2383 /* Allow rotated constants in ARM mode. */
2384 if (lowbit <= 4
2385 && ((i & ~0xc000003f) == 0
2386 || (i & ~0xf000000f) == 0
2387 || (i & ~0xfc000003) == 0))
2388 return TRUE;
2390 else
2392 HOST_WIDE_INT v;
2394 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2395 v = i & 0xff;
2396 v |= v << 16;
2397 if (i == v || i == (v | (v << 8)))
2398 return TRUE;
2400 /* Allow repeated pattern 0xXY00XY00. */
2401 v = i & 0xff00;
2402 v |= v << 16;
2403 if (i == v)
2404 return TRUE;
2407 return FALSE;
2410 /* Return true if I is a valid constant for the operation CODE. */
2412 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2414 if (const_ok_for_arm (i))
2415 return 1;
2417 switch (code)
2419 case SET:
2420 /* See if we can use movw. */
2421 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2422 return 1;
2423 else
2424 /* Otherwise, try mvn. */
2425 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2427 case PLUS:
2428 /* See if we can use addw or subw. */
2429 if (TARGET_THUMB2
2430 && ((i & 0xfffff000) == 0
2431 || ((-i) & 0xfffff000) == 0))
2432 return 1;
2433 /* else fall through. */
2435 case COMPARE:
2436 case EQ:
2437 case NE:
2438 case GT:
2439 case LE:
2440 case LT:
2441 case GE:
2442 case GEU:
2443 case LTU:
2444 case GTU:
2445 case LEU:
2446 case UNORDERED:
2447 case ORDERED:
2448 case UNEQ:
2449 case UNGE:
2450 case UNLT:
2451 case UNGT:
2452 case UNLE:
2453 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2455 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2456 case XOR:
2457 return 0;
2459 case IOR:
2460 if (TARGET_THUMB2)
2461 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2462 return 0;
2464 case AND:
2465 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2467 default:
2468 gcc_unreachable ();
2472 /* Return true if I is a valid di mode constant for the operation CODE. */
2474 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2476 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2477 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2478 rtx hi = GEN_INT (hi_val);
2479 rtx lo = GEN_INT (lo_val);
2481 if (TARGET_THUMB1)
2482 return 0;
2484 switch (code)
2486 case PLUS:
2487 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2489 default:
2490 return 0;
2494 /* Emit a sequence of insns to handle a large constant.
2495 CODE is the code of the operation required, it can be any of SET, PLUS,
2496 IOR, AND, XOR, MINUS;
2497 MODE is the mode in which the operation is being performed;
2498 VAL is the integer to operate on;
2499 SOURCE is the other operand (a register, or a null-pointer for SET);
2500 SUBTARGETS means it is safe to create scratch registers if that will
2501 either produce a simpler sequence, or we will want to cse the values.
2502 Return value is the number of insns emitted. */
2504 /* ??? Tweak this for thumb2. */
2506 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2507 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2509 rtx cond;
2511 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2512 cond = COND_EXEC_TEST (PATTERN (insn));
2513 else
2514 cond = NULL_RTX;
2516 if (subtargets || code == SET
2517 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2518 && REGNO (target) != REGNO (source)))
2520 /* After arm_reorg has been called, we can't fix up expensive
2521 constants by pushing them into memory so we must synthesize
2522 them in-line, regardless of the cost. This is only likely to
2523 be more costly on chips that have load delay slots and we are
2524 compiling without running the scheduler (so no splitting
2525 occurred before the final instruction emission).
2527 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2529 if (!after_arm_reorg
2530 && !cond
2531 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2532 1, 0)
2533 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2534 + (code != SET))))
2536 if (code == SET)
2538 /* Currently SET is the only monadic value for CODE, all
2539 the rest are diadic. */
2540 if (TARGET_USE_MOVT)
2541 arm_emit_movpair (target, GEN_INT (val));
2542 else
2543 emit_set_insn (target, GEN_INT (val));
2545 return 1;
2547 else
2549 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2551 if (TARGET_USE_MOVT)
2552 arm_emit_movpair (temp, GEN_INT (val));
2553 else
2554 emit_set_insn (temp, GEN_INT (val));
2556 /* For MINUS, the value is subtracted from, since we never
2557 have subtraction of a constant. */
2558 if (code == MINUS)
2559 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2560 else
2561 emit_set_insn (target,
2562 gen_rtx_fmt_ee (code, mode, source, temp));
2563 return 2;
2568 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2572 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2573 ARM/THUMB2 immediates, and add up to VAL.
2574 Thr function return value gives the number of insns required. */
2575 static int
2576 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2577 struct four_ints *return_sequence)
2579 int best_consecutive_zeros = 0;
2580 int i;
2581 int best_start = 0;
2582 int insns1, insns2;
2583 struct four_ints tmp_sequence;
2585 /* If we aren't targeting ARM, the best place to start is always at
2586 the bottom, otherwise look more closely. */
2587 if (TARGET_ARM)
2589 for (i = 0; i < 32; i += 2)
2591 int consecutive_zeros = 0;
2593 if (!(val & (3 << i)))
2595 while ((i < 32) && !(val & (3 << i)))
2597 consecutive_zeros += 2;
2598 i += 2;
2600 if (consecutive_zeros > best_consecutive_zeros)
2602 best_consecutive_zeros = consecutive_zeros;
2603 best_start = i - consecutive_zeros;
2605 i -= 2;
2610 /* So long as it won't require any more insns to do so, it's
2611 desirable to emit a small constant (in bits 0...9) in the last
2612 insn. This way there is more chance that it can be combined with
2613 a later addressing insn to form a pre-indexed load or store
2614 operation. Consider:
2616 *((volatile int *)0xe0000100) = 1;
2617 *((volatile int *)0xe0000110) = 2;
2619 We want this to wind up as:
2621 mov rA, #0xe0000000
2622 mov rB, #1
2623 str rB, [rA, #0x100]
2624 mov rB, #2
2625 str rB, [rA, #0x110]
2627 rather than having to synthesize both large constants from scratch.
2629 Therefore, we calculate how many insns would be required to emit
2630 the constant starting from `best_start', and also starting from
2631 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2632 yield a shorter sequence, we may as well use zero. */
2633 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2634 if (best_start != 0
2635 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2637 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2638 if (insns2 <= insns1)
2640 *return_sequence = tmp_sequence;
2641 insns1 = insns2;
2645 return insns1;
2648 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2649 static int
2650 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2651 struct four_ints *return_sequence, int i)
2653 int remainder = val & 0xffffffff;
2654 int insns = 0;
2656 /* Try and find a way of doing the job in either two or three
2657 instructions.
2659 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2660 location. We start at position I. This may be the MSB, or
2661 optimial_immediate_sequence may have positioned it at the largest block
2662 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2663 wrapping around to the top of the word when we drop off the bottom.
2664 In the worst case this code should produce no more than four insns.
2666 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2667 constants, shifted to any arbitrary location. We should always start
2668 at the MSB. */
2671 int end;
2672 unsigned int b1, b2, b3, b4;
2673 unsigned HOST_WIDE_INT result;
2674 int loc;
2676 gcc_assert (insns < 4);
2678 if (i <= 0)
2679 i += 32;
2681 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2682 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2684 loc = i;
2685 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2686 /* We can use addw/subw for the last 12 bits. */
2687 result = remainder;
2688 else
2690 /* Use an 8-bit shifted/rotated immediate. */
2691 end = i - 8;
2692 if (end < 0)
2693 end += 32;
2694 result = remainder & ((0x0ff << end)
2695 | ((i < end) ? (0xff >> (32 - end))
2696 : 0));
2697 i -= 8;
2700 else
2702 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2703 arbitrary shifts. */
2704 i -= TARGET_ARM ? 2 : 1;
2705 continue;
2708 /* Next, see if we can do a better job with a thumb2 replicated
2709 constant.
2711 We do it this way around to catch the cases like 0x01F001E0 where
2712 two 8-bit immediates would work, but a replicated constant would
2713 make it worse.
2715 TODO: 16-bit constants that don't clear all the bits, but still win.
2716 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2717 if (TARGET_THUMB2)
2719 b1 = (remainder & 0xff000000) >> 24;
2720 b2 = (remainder & 0x00ff0000) >> 16;
2721 b3 = (remainder & 0x0000ff00) >> 8;
2722 b4 = remainder & 0xff;
2724 if (loc > 24)
2726 /* The 8-bit immediate already found clears b1 (and maybe b2),
2727 but must leave b3 and b4 alone. */
2729 /* First try to find a 32-bit replicated constant that clears
2730 almost everything. We can assume that we can't do it in one,
2731 or else we wouldn't be here. */
2732 unsigned int tmp = b1 & b2 & b3 & b4;
2733 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2734 + (tmp << 24);
2735 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2736 + (tmp == b3) + (tmp == b4);
2737 if (tmp
2738 && (matching_bytes >= 3
2739 || (matching_bytes == 2
2740 && const_ok_for_op (remainder & ~tmp2, code))))
2742 /* At least 3 of the bytes match, and the fourth has at
2743 least as many bits set, or two of the bytes match
2744 and it will only require one more insn to finish. */
2745 result = tmp2;
2746 i = tmp != b1 ? 32
2747 : tmp != b2 ? 24
2748 : tmp != b3 ? 16
2749 : 8;
2752 /* Second, try to find a 16-bit replicated constant that can
2753 leave three of the bytes clear. If b2 or b4 is already
2754 zero, then we can. If the 8-bit from above would not
2755 clear b2 anyway, then we still win. */
2756 else if (b1 == b3 && (!b2 || !b4
2757 || (remainder & 0x00ff0000 & ~result)))
2759 result = remainder & 0xff00ff00;
2760 i = 24;
2763 else if (loc > 16)
2765 /* The 8-bit immediate already found clears b2 (and maybe b3)
2766 and we don't get here unless b1 is alredy clear, but it will
2767 leave b4 unchanged. */
2769 /* If we can clear b2 and b4 at once, then we win, since the
2770 8-bits couldn't possibly reach that far. */
2771 if (b2 == b4)
2773 result = remainder & 0x00ff00ff;
2774 i = 16;
2779 return_sequence->i[insns++] = result;
2780 remainder &= ~result;
2782 if (code == SET || code == MINUS)
2783 code = PLUS;
2785 while (remainder);
2787 return insns;
2790 /* Emit an instruction with the indicated PATTERN. If COND is
2791 non-NULL, conditionalize the execution of the instruction on COND
2792 being true. */
2794 static void
2795 emit_constant_insn (rtx cond, rtx pattern)
2797 if (cond)
2798 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2799 emit_insn (pattern);
2802 /* As above, but extra parameter GENERATE which, if clear, suppresses
2803 RTL generation. */
2805 static int
2806 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2807 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2808 int generate)
2810 int can_invert = 0;
2811 int can_negate = 0;
2812 int final_invert = 0;
2813 int i;
2814 int set_sign_bit_copies = 0;
2815 int clear_sign_bit_copies = 0;
2816 int clear_zero_bit_copies = 0;
2817 int set_zero_bit_copies = 0;
2818 int insns = 0, neg_insns, inv_insns;
2819 unsigned HOST_WIDE_INT temp1, temp2;
2820 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2821 struct four_ints *immediates;
2822 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2824 /* Find out which operations are safe for a given CODE. Also do a quick
2825 check for degenerate cases; these can occur when DImode operations
2826 are split. */
2827 switch (code)
2829 case SET:
2830 can_invert = 1;
2831 break;
2833 case PLUS:
2834 can_negate = 1;
2835 break;
2837 case IOR:
2838 if (remainder == 0xffffffff)
2840 if (generate)
2841 emit_constant_insn (cond,
2842 gen_rtx_SET (VOIDmode, target,
2843 GEN_INT (ARM_SIGN_EXTEND (val))));
2844 return 1;
2847 if (remainder == 0)
2849 if (reload_completed && rtx_equal_p (target, source))
2850 return 0;
2852 if (generate)
2853 emit_constant_insn (cond,
2854 gen_rtx_SET (VOIDmode, target, source));
2855 return 1;
2857 break;
2859 case AND:
2860 if (remainder == 0)
2862 if (generate)
2863 emit_constant_insn (cond,
2864 gen_rtx_SET (VOIDmode, target, const0_rtx));
2865 return 1;
2867 if (remainder == 0xffffffff)
2869 if (reload_completed && rtx_equal_p (target, source))
2870 return 0;
2871 if (generate)
2872 emit_constant_insn (cond,
2873 gen_rtx_SET (VOIDmode, target, source));
2874 return 1;
2876 can_invert = 1;
2877 break;
2879 case XOR:
2880 if (remainder == 0)
2882 if (reload_completed && rtx_equal_p (target, source))
2883 return 0;
2884 if (generate)
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target, source));
2887 return 1;
2890 if (remainder == 0xffffffff)
2892 if (generate)
2893 emit_constant_insn (cond,
2894 gen_rtx_SET (VOIDmode, target,
2895 gen_rtx_NOT (mode, source)));
2896 return 1;
2898 final_invert = 1;
2899 break;
2901 case MINUS:
2902 /* We treat MINUS as (val - source), since (source - val) is always
2903 passed as (source + (-val)). */
2904 if (remainder == 0)
2906 if (generate)
2907 emit_constant_insn (cond,
2908 gen_rtx_SET (VOIDmode, target,
2909 gen_rtx_NEG (mode, source)));
2910 return 1;
2912 if (const_ok_for_arm (val))
2914 if (generate)
2915 emit_constant_insn (cond,
2916 gen_rtx_SET (VOIDmode, target,
2917 gen_rtx_MINUS (mode, GEN_INT (val),
2918 source)));
2919 return 1;
2922 break;
2924 default:
2925 gcc_unreachable ();
2928 /* If we can do it in one insn get out quickly. */
2929 if (const_ok_for_op (val, code))
2931 if (generate)
2932 emit_constant_insn (cond,
2933 gen_rtx_SET (VOIDmode, target,
2934 (source
2935 ? gen_rtx_fmt_ee (code, mode, source,
2936 GEN_INT (val))
2937 : GEN_INT (val))));
2938 return 1;
2941 /* Calculate a few attributes that may be useful for specific
2942 optimizations. */
2943 /* Count number of leading zeros. */
2944 for (i = 31; i >= 0; i--)
2946 if ((remainder & (1 << i)) == 0)
2947 clear_sign_bit_copies++;
2948 else
2949 break;
2952 /* Count number of leading 1's. */
2953 for (i = 31; i >= 0; i--)
2955 if ((remainder & (1 << i)) != 0)
2956 set_sign_bit_copies++;
2957 else
2958 break;
2961 /* Count number of trailing zero's. */
2962 for (i = 0; i <= 31; i++)
2964 if ((remainder & (1 << i)) == 0)
2965 clear_zero_bit_copies++;
2966 else
2967 break;
2970 /* Count number of trailing 1's. */
2971 for (i = 0; i <= 31; i++)
2973 if ((remainder & (1 << i)) != 0)
2974 set_zero_bit_copies++;
2975 else
2976 break;
2979 switch (code)
2981 case SET:
2982 /* See if we can do this by sign_extending a constant that is known
2983 to be negative. This is a good, way of doing it, since the shift
2984 may well merge into a subsequent insn. */
2985 if (set_sign_bit_copies > 1)
2987 if (const_ok_for_arm
2988 (temp1 = ARM_SIGN_EXTEND (remainder
2989 << (set_sign_bit_copies - 1))))
2991 if (generate)
2993 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2994 emit_constant_insn (cond,
2995 gen_rtx_SET (VOIDmode, new_src,
2996 GEN_INT (temp1)));
2997 emit_constant_insn (cond,
2998 gen_ashrsi3 (target, new_src,
2999 GEN_INT (set_sign_bit_copies - 1)));
3001 return 2;
3003 /* For an inverted constant, we will need to set the low bits,
3004 these will be shifted out of harm's way. */
3005 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3006 if (const_ok_for_arm (~temp1))
3008 if (generate)
3010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3011 emit_constant_insn (cond,
3012 gen_rtx_SET (VOIDmode, new_src,
3013 GEN_INT (temp1)));
3014 emit_constant_insn (cond,
3015 gen_ashrsi3 (target, new_src,
3016 GEN_INT (set_sign_bit_copies - 1)));
3018 return 2;
3022 /* See if we can calculate the value as the difference between two
3023 valid immediates. */
3024 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3026 int topshift = clear_sign_bit_copies & ~1;
3028 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3029 & (0xff000000 >> topshift));
3031 /* If temp1 is zero, then that means the 9 most significant
3032 bits of remainder were 1 and we've caused it to overflow.
3033 When topshift is 0 we don't need to do anything since we
3034 can borrow from 'bit 32'. */
3035 if (temp1 == 0 && topshift != 0)
3036 temp1 = 0x80000000 >> (topshift - 1);
3038 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3040 if (const_ok_for_arm (temp2))
3042 if (generate)
3044 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3045 emit_constant_insn (cond,
3046 gen_rtx_SET (VOIDmode, new_src,
3047 GEN_INT (temp1)));
3048 emit_constant_insn (cond,
3049 gen_addsi3 (target, new_src,
3050 GEN_INT (-temp2)));
3053 return 2;
3057 /* See if we can generate this by setting the bottom (or the top)
3058 16 bits, and then shifting these into the other half of the
3059 word. We only look for the simplest cases, to do more would cost
3060 too much. Be careful, however, not to generate this when the
3061 alternative would take fewer insns. */
3062 if (val & 0xffff0000)
3064 temp1 = remainder & 0xffff0000;
3065 temp2 = remainder & 0x0000ffff;
3067 /* Overlaps outside this range are best done using other methods. */
3068 for (i = 9; i < 24; i++)
3070 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3071 && !const_ok_for_arm (temp2))
3073 rtx new_src = (subtargets
3074 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3075 : target);
3076 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3077 source, subtargets, generate);
3078 source = new_src;
3079 if (generate)
3080 emit_constant_insn
3081 (cond,
3082 gen_rtx_SET
3083 (VOIDmode, target,
3084 gen_rtx_IOR (mode,
3085 gen_rtx_ASHIFT (mode, source,
3086 GEN_INT (i)),
3087 source)));
3088 return insns + 1;
3092 /* Don't duplicate cases already considered. */
3093 for (i = 17; i < 24; i++)
3095 if (((temp1 | (temp1 >> i)) == remainder)
3096 && !const_ok_for_arm (temp1))
3098 rtx new_src = (subtargets
3099 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3100 : target);
3101 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3102 source, subtargets, generate);
3103 source = new_src;
3104 if (generate)
3105 emit_constant_insn
3106 (cond,
3107 gen_rtx_SET (VOIDmode, target,
3108 gen_rtx_IOR
3109 (mode,
3110 gen_rtx_LSHIFTRT (mode, source,
3111 GEN_INT (i)),
3112 source)));
3113 return insns + 1;
3117 break;
3119 case IOR:
3120 case XOR:
3121 /* If we have IOR or XOR, and the constant can be loaded in a
3122 single instruction, and we can find a temporary to put it in,
3123 then this can be done in two instructions instead of 3-4. */
3124 if (subtargets
3125 /* TARGET can't be NULL if SUBTARGETS is 0 */
3126 || (reload_completed && !reg_mentioned_p (target, source)))
3128 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3130 if (generate)
3132 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3134 emit_constant_insn (cond,
3135 gen_rtx_SET (VOIDmode, sub,
3136 GEN_INT (val)));
3137 emit_constant_insn (cond,
3138 gen_rtx_SET (VOIDmode, target,
3139 gen_rtx_fmt_ee (code, mode,
3140 source, sub)));
3142 return 2;
3146 if (code == XOR)
3147 break;
3149 /* Convert.
3150 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3151 and the remainder 0s for e.g. 0xfff00000)
3152 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3154 This can be done in 2 instructions by using shifts with mov or mvn.
3155 e.g. for
3156 x = x | 0xfff00000;
3157 we generate.
3158 mvn r0, r0, asl #12
3159 mvn r0, r0, lsr #12 */
3160 if (set_sign_bit_copies > 8
3161 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3163 if (generate)
3165 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3166 rtx shift = GEN_INT (set_sign_bit_copies);
3168 emit_constant_insn
3169 (cond,
3170 gen_rtx_SET (VOIDmode, sub,
3171 gen_rtx_NOT (mode,
3172 gen_rtx_ASHIFT (mode,
3173 source,
3174 shift))));
3175 emit_constant_insn
3176 (cond,
3177 gen_rtx_SET (VOIDmode, target,
3178 gen_rtx_NOT (mode,
3179 gen_rtx_LSHIFTRT (mode, sub,
3180 shift))));
3182 return 2;
3185 /* Convert
3186 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3188 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3190 For eg. r0 = r0 | 0xfff
3191 mvn r0, r0, lsr #12
3192 mvn r0, r0, asl #12
3195 if (set_zero_bit_copies > 8
3196 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3198 if (generate)
3200 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3201 rtx shift = GEN_INT (set_zero_bit_copies);
3203 emit_constant_insn
3204 (cond,
3205 gen_rtx_SET (VOIDmode, sub,
3206 gen_rtx_NOT (mode,
3207 gen_rtx_LSHIFTRT (mode,
3208 source,
3209 shift))));
3210 emit_constant_insn
3211 (cond,
3212 gen_rtx_SET (VOIDmode, target,
3213 gen_rtx_NOT (mode,
3214 gen_rtx_ASHIFT (mode, sub,
3215 shift))));
3217 return 2;
3220 /* This will never be reached for Thumb2 because orn is a valid
3221 instruction. This is for Thumb1 and the ARM 32 bit cases.
3223 x = y | constant (such that ~constant is a valid constant)
3224 Transform this to
3225 x = ~(~y & ~constant).
3227 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3229 if (generate)
3231 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3232 emit_constant_insn (cond,
3233 gen_rtx_SET (VOIDmode, sub,
3234 gen_rtx_NOT (mode, source)));
3235 source = sub;
3236 if (subtargets)
3237 sub = gen_reg_rtx (mode);
3238 emit_constant_insn (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_AND (mode, source,
3241 GEN_INT (temp1))));
3242 emit_constant_insn (cond,
3243 gen_rtx_SET (VOIDmode, target,
3244 gen_rtx_NOT (mode, sub)));
3246 return 3;
3248 break;
3250 case AND:
3251 /* See if two shifts will do 2 or more insn's worth of work. */
3252 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3254 HOST_WIDE_INT shift_mask = ((0xffffffff
3255 << (32 - clear_sign_bit_copies))
3256 & 0xffffffff);
3258 if ((remainder | shift_mask) != 0xffffffff)
3260 if (generate)
3262 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3263 insns = arm_gen_constant (AND, mode, cond,
3264 remainder | shift_mask,
3265 new_src, source, subtargets, 1);
3266 source = new_src;
3268 else
3270 rtx targ = subtargets ? NULL_RTX : target;
3271 insns = arm_gen_constant (AND, mode, cond,
3272 remainder | shift_mask,
3273 targ, source, subtargets, 0);
3277 if (generate)
3279 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3280 rtx shift = GEN_INT (clear_sign_bit_copies);
3282 emit_insn (gen_ashlsi3 (new_src, source, shift));
3283 emit_insn (gen_lshrsi3 (target, new_src, shift));
3286 return insns + 2;
3289 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3291 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3293 if ((remainder | shift_mask) != 0xffffffff)
3295 if (generate)
3297 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3299 insns = arm_gen_constant (AND, mode, cond,
3300 remainder | shift_mask,
3301 new_src, source, subtargets, 1);
3302 source = new_src;
3304 else
3306 rtx targ = subtargets ? NULL_RTX : target;
3308 insns = arm_gen_constant (AND, mode, cond,
3309 remainder | shift_mask,
3310 targ, source, subtargets, 0);
3314 if (generate)
3316 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3317 rtx shift = GEN_INT (clear_zero_bit_copies);
3319 emit_insn (gen_lshrsi3 (new_src, source, shift));
3320 emit_insn (gen_ashlsi3 (target, new_src, shift));
3323 return insns + 2;
3326 break;
3328 default:
3329 break;
3332 /* Calculate what the instruction sequences would be if we generated it
3333 normally, negated, or inverted. */
3334 if (code == AND)
3335 /* AND cannot be split into multiple insns, so invert and use BIC. */
3336 insns = 99;
3337 else
3338 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3340 if (can_negate)
3341 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3342 &neg_immediates);
3343 else
3344 neg_insns = 99;
3346 if (can_invert || final_invert)
3347 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3348 &inv_immediates);
3349 else
3350 inv_insns = 99;
3352 immediates = &pos_immediates;
3354 /* Is the negated immediate sequence more efficient? */
3355 if (neg_insns < insns && neg_insns <= inv_insns)
3357 insns = neg_insns;
3358 immediates = &neg_immediates;
3360 else
3361 can_negate = 0;
3363 /* Is the inverted immediate sequence more efficient?
3364 We must allow for an extra NOT instruction for XOR operations, although
3365 there is some chance that the final 'mvn' will get optimized later. */
3366 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3368 insns = inv_insns;
3369 immediates = &inv_immediates;
3371 else
3373 can_invert = 0;
3374 final_invert = 0;
3377 /* Now output the chosen sequence as instructions. */
3378 if (generate)
3380 for (i = 0; i < insns; i++)
3382 rtx new_src, temp1_rtx;
3384 temp1 = immediates->i[i];
3386 if (code == SET || code == MINUS)
3387 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3388 else if ((final_invert || i < (insns - 1)) && subtargets)
3389 new_src = gen_reg_rtx (mode);
3390 else
3391 new_src = target;
3393 if (can_invert)
3394 temp1 = ~temp1;
3395 else if (can_negate)
3396 temp1 = -temp1;
3398 temp1 = trunc_int_for_mode (temp1, mode);
3399 temp1_rtx = GEN_INT (temp1);
3401 if (code == SET)
3403 else if (code == MINUS)
3404 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3405 else
3406 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3408 emit_constant_insn (cond,
3409 gen_rtx_SET (VOIDmode, new_src,
3410 temp1_rtx));
3411 source = new_src;
3413 if (code == SET)
3415 can_negate = can_invert;
3416 can_invert = 0;
3417 code = PLUS;
3419 else if (code == MINUS)
3420 code = PLUS;
3424 if (final_invert)
3426 if (generate)
3427 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3428 gen_rtx_NOT (mode, source)));
3429 insns++;
3432 return insns;
3435 /* Canonicalize a comparison so that we are more likely to recognize it.
3436 This can be done for a few constant compares, where we can make the
3437 immediate value easier to load. */
3439 enum rtx_code
3440 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3442 enum machine_mode mode;
3443 unsigned HOST_WIDE_INT i, maxval;
3445 mode = GET_MODE (*op0);
3446 if (mode == VOIDmode)
3447 mode = GET_MODE (*op1);
3449 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3451 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3452 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3453 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3454 for GTU/LEU in Thumb mode. */
3455 if (mode == DImode)
3457 rtx tem;
3459 if (code == GT || code == LE
3460 || (!TARGET_ARM && (code == GTU || code == LEU)))
3462 /* Missing comparison. First try to use an available
3463 comparison. */
3464 if (GET_CODE (*op1) == CONST_INT)
3466 i = INTVAL (*op1);
3467 switch (code)
3469 case GT:
3470 case LE:
3471 if (i != maxval
3472 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3474 *op1 = GEN_INT (i + 1);
3475 return code == GT ? GE : LT;
3477 break;
3478 case GTU:
3479 case LEU:
3480 if (i != ~((unsigned HOST_WIDE_INT) 0)
3481 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3483 *op1 = GEN_INT (i + 1);
3484 return code == GTU ? GEU : LTU;
3486 break;
3487 default:
3488 gcc_unreachable ();
3492 /* If that did not work, reverse the condition. */
3493 tem = *op0;
3494 *op0 = *op1;
3495 *op1 = tem;
3496 return swap_condition (code);
3499 return code;
3502 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3503 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3504 to facilitate possible combining with a cmp into 'ands'. */
3505 if (mode == SImode
3506 && GET_CODE (*op0) == ZERO_EXTEND
3507 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3508 && GET_MODE (XEXP (*op0, 0)) == QImode
3509 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3510 && subreg_lowpart_p (XEXP (*op0, 0))
3511 && *op1 == const0_rtx)
3512 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3513 GEN_INT (255));
3515 /* Comparisons smaller than DImode. Only adjust comparisons against
3516 an out-of-range constant. */
3517 if (GET_CODE (*op1) != CONST_INT
3518 || const_ok_for_arm (INTVAL (*op1))
3519 || const_ok_for_arm (- INTVAL (*op1)))
3520 return code;
3522 i = INTVAL (*op1);
3524 switch (code)
3526 case EQ:
3527 case NE:
3528 return code;
3530 case GT:
3531 case LE:
3532 if (i != maxval
3533 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3535 *op1 = GEN_INT (i + 1);
3536 return code == GT ? GE : LT;
3538 break;
3540 case GE:
3541 case LT:
3542 if (i != ~maxval
3543 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3545 *op1 = GEN_INT (i - 1);
3546 return code == GE ? GT : LE;
3548 break;
3550 case GTU:
3551 case LEU:
3552 if (i != ~((unsigned HOST_WIDE_INT) 0)
3553 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3555 *op1 = GEN_INT (i + 1);
3556 return code == GTU ? GEU : LTU;
3558 break;
3560 case GEU:
3561 case LTU:
3562 if (i != 0
3563 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3565 *op1 = GEN_INT (i - 1);
3566 return code == GEU ? GTU : LEU;
3568 break;
3570 default:
3571 gcc_unreachable ();
3574 return code;
3578 /* Define how to find the value returned by a function. */
3580 static rtx
3581 arm_function_value(const_tree type, const_tree func,
3582 bool outgoing ATTRIBUTE_UNUSED)
3584 enum machine_mode mode;
3585 int unsignedp ATTRIBUTE_UNUSED;
3586 rtx r ATTRIBUTE_UNUSED;
3588 mode = TYPE_MODE (type);
3590 if (TARGET_AAPCS_BASED)
3591 return aapcs_allocate_return_reg (mode, type, func);
3593 /* Promote integer types. */
3594 if (INTEGRAL_TYPE_P (type))
3595 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3597 /* Promotes small structs returned in a register to full-word size
3598 for big-endian AAPCS. */
3599 if (arm_return_in_msb (type))
3601 HOST_WIDE_INT size = int_size_in_bytes (type);
3602 if (size % UNITS_PER_WORD != 0)
3604 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3605 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3609 return arm_libcall_value_1 (mode);
3612 static int
3613 libcall_eq (const void *p1, const void *p2)
3615 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3618 static hashval_t
3619 libcall_hash (const void *p1)
3621 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3624 static void
3625 add_libcall (htab_t htab, rtx libcall)
3627 *htab_find_slot (htab, libcall, INSERT) = libcall;
3630 static bool
3631 arm_libcall_uses_aapcs_base (const_rtx libcall)
3633 static bool init_done = false;
3634 static htab_t libcall_htab;
3636 if (!init_done)
3638 init_done = true;
3640 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3641 NULL);
3642 add_libcall (libcall_htab,
3643 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3644 add_libcall (libcall_htab,
3645 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3651 add_libcall (libcall_htab,
3652 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3653 add_libcall (libcall_htab,
3654 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3655 add_libcall (libcall_htab,
3656 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3657 add_libcall (libcall_htab,
3658 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3660 add_libcall (libcall_htab,
3661 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3668 add_libcall (libcall_htab,
3669 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3670 add_libcall (libcall_htab,
3671 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3672 add_libcall (libcall_htab,
3673 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3674 add_libcall (libcall_htab,
3675 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3677 /* Values from double-precision helper functions are returned in core
3678 registers if the selected core only supports single-precision
3679 arithmetic, even if we are using the hard-float ABI. The same is
3680 true for single-precision helpers, but we will never be using the
3681 hard-float ABI on a CPU which doesn't support single-precision
3682 operations in hardware. */
3683 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3684 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3685 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3686 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3687 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3688 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3689 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3690 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3691 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3692 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3693 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3694 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3695 SFmode));
3696 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3697 DFmode));
3700 return libcall && htab_find (libcall_htab, libcall) != NULL;
3703 static rtx
3704 arm_libcall_value_1 (enum machine_mode mode)
3706 if (TARGET_AAPCS_BASED)
3707 return aapcs_libcall_value (mode);
3708 else if (TARGET_IWMMXT_ABI
3709 && arm_vector_mode_supported_p (mode))
3710 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3711 else
3712 return gen_rtx_REG (mode, ARG_REGISTER (1));
3715 /* Define how to find the value returned by a library function
3716 assuming the value has mode MODE. */
3718 static rtx
3719 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3721 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3722 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3724 /* The following libcalls return their result in integer registers,
3725 even though they return a floating point value. */
3726 if (arm_libcall_uses_aapcs_base (libcall))
3727 return gen_rtx_REG (mode, ARG_REGISTER(1));
3731 return arm_libcall_value_1 (mode);
3734 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3736 static bool
3737 arm_function_value_regno_p (const unsigned int regno)
3739 if (regno == ARG_REGISTER (1)
3740 || (TARGET_32BIT
3741 && TARGET_AAPCS_BASED
3742 && TARGET_VFP
3743 && TARGET_HARD_FLOAT
3744 && regno == FIRST_VFP_REGNUM)
3745 || (TARGET_IWMMXT_ABI
3746 && regno == FIRST_IWMMXT_REGNUM))
3747 return true;
3749 return false;
3752 /* Determine the amount of memory needed to store the possible return
3753 registers of an untyped call. */
3755 arm_apply_result_size (void)
3757 int size = 16;
3759 if (TARGET_32BIT)
3761 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3762 size += 32;
3763 if (TARGET_IWMMXT_ABI)
3764 size += 8;
3767 return size;
3770 /* Decide whether TYPE should be returned in memory (true)
3771 or in a register (false). FNTYPE is the type of the function making
3772 the call. */
3773 static bool
3774 arm_return_in_memory (const_tree type, const_tree fntype)
3776 HOST_WIDE_INT size;
3778 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3780 if (TARGET_AAPCS_BASED)
3782 /* Simple, non-aggregate types (ie not including vectors and
3783 complex) are always returned in a register (or registers).
3784 We don't care about which register here, so we can short-cut
3785 some of the detail. */
3786 if (!AGGREGATE_TYPE_P (type)
3787 && TREE_CODE (type) != VECTOR_TYPE
3788 && TREE_CODE (type) != COMPLEX_TYPE)
3789 return false;
3791 /* Any return value that is no larger than one word can be
3792 returned in r0. */
3793 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3794 return false;
3796 /* Check any available co-processors to see if they accept the
3797 type as a register candidate (VFP, for example, can return
3798 some aggregates in consecutive registers). These aren't
3799 available if the call is variadic. */
3800 if (aapcs_select_return_coproc (type, fntype) >= 0)
3801 return false;
3803 /* Vector values should be returned using ARM registers, not
3804 memory (unless they're over 16 bytes, which will break since
3805 we only have four call-clobbered registers to play with). */
3806 if (TREE_CODE (type) == VECTOR_TYPE)
3807 return (size < 0 || size > (4 * UNITS_PER_WORD));
3809 /* The rest go in memory. */
3810 return true;
3813 if (TREE_CODE (type) == VECTOR_TYPE)
3814 return (size < 0 || size > (4 * UNITS_PER_WORD));
3816 if (!AGGREGATE_TYPE_P (type) &&
3817 (TREE_CODE (type) != VECTOR_TYPE))
3818 /* All simple types are returned in registers. */
3819 return false;
3821 if (arm_abi != ARM_ABI_APCS)
3823 /* ATPCS and later return aggregate types in memory only if they are
3824 larger than a word (or are variable size). */
3825 return (size < 0 || size > UNITS_PER_WORD);
3828 /* For the arm-wince targets we choose to be compatible with Microsoft's
3829 ARM and Thumb compilers, which always return aggregates in memory. */
3830 #ifndef ARM_WINCE
3831 /* All structures/unions bigger than one word are returned in memory.
3832 Also catch the case where int_size_in_bytes returns -1. In this case
3833 the aggregate is either huge or of variable size, and in either case
3834 we will want to return it via memory and not in a register. */
3835 if (size < 0 || size > UNITS_PER_WORD)
3836 return true;
3838 if (TREE_CODE (type) == RECORD_TYPE)
3840 tree field;
3842 /* For a struct the APCS says that we only return in a register
3843 if the type is 'integer like' and every addressable element
3844 has an offset of zero. For practical purposes this means
3845 that the structure can have at most one non bit-field element
3846 and that this element must be the first one in the structure. */
3848 /* Find the first field, ignoring non FIELD_DECL things which will
3849 have been created by C++. */
3850 for (field = TYPE_FIELDS (type);
3851 field && TREE_CODE (field) != FIELD_DECL;
3852 field = DECL_CHAIN (field))
3853 continue;
3855 if (field == NULL)
3856 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3858 /* Check that the first field is valid for returning in a register. */
3860 /* ... Floats are not allowed */
3861 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3862 return true;
3864 /* ... Aggregates that are not themselves valid for returning in
3865 a register are not allowed. */
3866 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3867 return true;
3869 /* Now check the remaining fields, if any. Only bitfields are allowed,
3870 since they are not addressable. */
3871 for (field = DECL_CHAIN (field);
3872 field;
3873 field = DECL_CHAIN (field))
3875 if (TREE_CODE (field) != FIELD_DECL)
3876 continue;
3878 if (!DECL_BIT_FIELD_TYPE (field))
3879 return true;
3882 return false;
3885 if (TREE_CODE (type) == UNION_TYPE)
3887 tree field;
3889 /* Unions can be returned in registers if every element is
3890 integral, or can be returned in an integer register. */
3891 for (field = TYPE_FIELDS (type);
3892 field;
3893 field = DECL_CHAIN (field))
3895 if (TREE_CODE (field) != FIELD_DECL)
3896 continue;
3898 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3899 return true;
3901 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3902 return true;
3905 return false;
3907 #endif /* not ARM_WINCE */
3909 /* Return all other types in memory. */
3910 return true;
3913 const struct pcs_attribute_arg
3915 const char *arg;
3916 enum arm_pcs value;
3917 } pcs_attribute_args[] =
3919 {"aapcs", ARM_PCS_AAPCS},
3920 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3921 #if 0
3922 /* We could recognize these, but changes would be needed elsewhere
3923 * to implement them. */
3924 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3925 {"atpcs", ARM_PCS_ATPCS},
3926 {"apcs", ARM_PCS_APCS},
3927 #endif
3928 {NULL, ARM_PCS_UNKNOWN}
3931 static enum arm_pcs
3932 arm_pcs_from_attribute (tree attr)
3934 const struct pcs_attribute_arg *ptr;
3935 const char *arg;
3937 /* Get the value of the argument. */
3938 if (TREE_VALUE (attr) == NULL_TREE
3939 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3940 return ARM_PCS_UNKNOWN;
3942 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3944 /* Check it against the list of known arguments. */
3945 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3946 if (streq (arg, ptr->arg))
3947 return ptr->value;
3949 /* An unrecognized interrupt type. */
3950 return ARM_PCS_UNKNOWN;
3953 /* Get the PCS variant to use for this call. TYPE is the function's type
3954 specification, DECL is the specific declartion. DECL may be null if
3955 the call could be indirect or if this is a library call. */
3956 static enum arm_pcs
3957 arm_get_pcs_model (const_tree type, const_tree decl)
3959 bool user_convention = false;
3960 enum arm_pcs user_pcs = arm_pcs_default;
3961 tree attr;
3963 gcc_assert (type);
3965 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3966 if (attr)
3968 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3969 user_convention = true;
3972 if (TARGET_AAPCS_BASED)
3974 /* Detect varargs functions. These always use the base rules
3975 (no argument is ever a candidate for a co-processor
3976 register). */
3977 bool base_rules = stdarg_p (type);
3979 if (user_convention)
3981 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3982 sorry ("non-AAPCS derived PCS variant");
3983 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3984 error ("variadic functions must use the base AAPCS variant");
3987 if (base_rules)
3988 return ARM_PCS_AAPCS;
3989 else if (user_convention)
3990 return user_pcs;
3991 else if (decl && flag_unit_at_a_time)
3993 /* Local functions never leak outside this compilation unit,
3994 so we are free to use whatever conventions are
3995 appropriate. */
3996 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3997 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3998 if (i && i->local)
3999 return ARM_PCS_AAPCS_LOCAL;
4002 else if (user_convention && user_pcs != arm_pcs_default)
4003 sorry ("PCS variant");
4005 /* For everything else we use the target's default. */
4006 return arm_pcs_default;
4010 static void
4011 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4012 const_tree fntype ATTRIBUTE_UNUSED,
4013 rtx libcall ATTRIBUTE_UNUSED,
4014 const_tree fndecl ATTRIBUTE_UNUSED)
4016 /* Record the unallocated VFP registers. */
4017 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4018 pcum->aapcs_vfp_reg_alloc = 0;
4021 /* Walk down the type tree of TYPE counting consecutive base elements.
4022 If *MODEP is VOIDmode, then set it to the first valid floating point
4023 type. If a non-floating point type is found, or if a floating point
4024 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4025 otherwise return the count in the sub-tree. */
4026 static int
4027 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4029 enum machine_mode mode;
4030 HOST_WIDE_INT size;
4032 switch (TREE_CODE (type))
4034 case REAL_TYPE:
4035 mode = TYPE_MODE (type);
4036 if (mode != DFmode && mode != SFmode)
4037 return -1;
4039 if (*modep == VOIDmode)
4040 *modep = mode;
4042 if (*modep == mode)
4043 return 1;
4045 break;
4047 case COMPLEX_TYPE:
4048 mode = TYPE_MODE (TREE_TYPE (type));
4049 if (mode != DFmode && mode != SFmode)
4050 return -1;
4052 if (*modep == VOIDmode)
4053 *modep = mode;
4055 if (*modep == mode)
4056 return 2;
4058 break;
4060 case VECTOR_TYPE:
4061 /* Use V2SImode and V4SImode as representatives of all 64-bit
4062 and 128-bit vector types, whether or not those modes are
4063 supported with the present options. */
4064 size = int_size_in_bytes (type);
4065 switch (size)
4067 case 8:
4068 mode = V2SImode;
4069 break;
4070 case 16:
4071 mode = V4SImode;
4072 break;
4073 default:
4074 return -1;
4077 if (*modep == VOIDmode)
4078 *modep = mode;
4080 /* Vector modes are considered to be opaque: two vectors are
4081 equivalent for the purposes of being homogeneous aggregates
4082 if they are the same size. */
4083 if (*modep == mode)
4084 return 1;
4086 break;
4088 case ARRAY_TYPE:
4090 int count;
4091 tree index = TYPE_DOMAIN (type);
4093 /* Can't handle incomplete types. */
4094 if (!COMPLETE_TYPE_P(type))
4095 return -1;
4097 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4098 if (count == -1
4099 || !index
4100 || !TYPE_MAX_VALUE (index)
4101 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4102 || !TYPE_MIN_VALUE (index)
4103 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4104 || count < 0)
4105 return -1;
4107 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4108 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4110 /* There must be no padding. */
4111 if (!host_integerp (TYPE_SIZE (type), 1)
4112 || (tree_low_cst (TYPE_SIZE (type), 1)
4113 != count * GET_MODE_BITSIZE (*modep)))
4114 return -1;
4116 return count;
4119 case RECORD_TYPE:
4121 int count = 0;
4122 int sub_count;
4123 tree field;
4125 /* Can't handle incomplete types. */
4126 if (!COMPLETE_TYPE_P(type))
4127 return -1;
4129 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4131 if (TREE_CODE (field) != FIELD_DECL)
4132 continue;
4134 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4135 if (sub_count < 0)
4136 return -1;
4137 count += sub_count;
4140 /* There must be no padding. */
4141 if (!host_integerp (TYPE_SIZE (type), 1)
4142 || (tree_low_cst (TYPE_SIZE (type), 1)
4143 != count * GET_MODE_BITSIZE (*modep)))
4144 return -1;
4146 return count;
4149 case UNION_TYPE:
4150 case QUAL_UNION_TYPE:
4152 /* These aren't very interesting except in a degenerate case. */
4153 int count = 0;
4154 int sub_count;
4155 tree field;
4157 /* Can't handle incomplete types. */
4158 if (!COMPLETE_TYPE_P(type))
4159 return -1;
4161 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4163 if (TREE_CODE (field) != FIELD_DECL)
4164 continue;
4166 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4167 if (sub_count < 0)
4168 return -1;
4169 count = count > sub_count ? count : sub_count;
4172 /* There must be no padding. */
4173 if (!host_integerp (TYPE_SIZE (type), 1)
4174 || (tree_low_cst (TYPE_SIZE (type), 1)
4175 != count * GET_MODE_BITSIZE (*modep)))
4176 return -1;
4178 return count;
4181 default:
4182 break;
4185 return -1;
4188 /* Return true if PCS_VARIANT should use VFP registers. */
4189 static bool
4190 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4192 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4194 static bool seen_thumb1_vfp = false;
4196 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4198 sorry ("Thumb-1 hard-float VFP ABI");
4199 /* sorry() is not immediately fatal, so only display this once. */
4200 seen_thumb1_vfp = true;
4203 return true;
4206 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4207 return false;
4209 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4210 (TARGET_VFP_DOUBLE || !is_double));
4213 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4214 suitable for passing or returning in VFP registers for the PCS
4215 variant selected. If it is, then *BASE_MODE is updated to contain
4216 a machine mode describing each element of the argument's type and
4217 *COUNT to hold the number of such elements. */
4218 static bool
4219 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4220 enum machine_mode mode, const_tree type,
4221 enum machine_mode *base_mode, int *count)
4223 enum machine_mode new_mode = VOIDmode;
4225 /* If we have the type information, prefer that to working things
4226 out from the mode. */
4227 if (type)
4229 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4231 if (ag_count > 0 && ag_count <= 4)
4232 *count = ag_count;
4233 else
4234 return false;
4236 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4237 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4238 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4240 *count = 1;
4241 new_mode = mode;
4243 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4245 *count = 2;
4246 new_mode = (mode == DCmode ? DFmode : SFmode);
4248 else
4249 return false;
4252 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4253 return false;
4255 *base_mode = new_mode;
4256 return true;
4259 static bool
4260 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4261 enum machine_mode mode, const_tree type)
4263 int count ATTRIBUTE_UNUSED;
4264 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4266 if (!use_vfp_abi (pcs_variant, false))
4267 return false;
4268 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4269 &ag_mode, &count);
4272 static bool
4273 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4274 const_tree type)
4276 if (!use_vfp_abi (pcum->pcs_variant, false))
4277 return false;
4279 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4280 &pcum->aapcs_vfp_rmode,
4281 &pcum->aapcs_vfp_rcount);
4284 static bool
4285 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4286 const_tree type ATTRIBUTE_UNUSED)
4288 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4289 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4290 int regno;
4292 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4293 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4295 pcum->aapcs_vfp_reg_alloc = mask << regno;
4296 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4298 int i;
4299 int rcount = pcum->aapcs_vfp_rcount;
4300 int rshift = shift;
4301 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4302 rtx par;
4303 if (!TARGET_NEON)
4305 /* Avoid using unsupported vector modes. */
4306 if (rmode == V2SImode)
4307 rmode = DImode;
4308 else if (rmode == V4SImode)
4310 rmode = DImode;
4311 rcount *= 2;
4312 rshift /= 2;
4315 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4316 for (i = 0; i < rcount; i++)
4318 rtx tmp = gen_rtx_REG (rmode,
4319 FIRST_VFP_REGNUM + regno + i * rshift);
4320 tmp = gen_rtx_EXPR_LIST
4321 (VOIDmode, tmp,
4322 GEN_INT (i * GET_MODE_SIZE (rmode)));
4323 XVECEXP (par, 0, i) = tmp;
4326 pcum->aapcs_reg = par;
4328 else
4329 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4330 return true;
4332 return false;
4335 static rtx
4336 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4337 enum machine_mode mode,
4338 const_tree type ATTRIBUTE_UNUSED)
4340 if (!use_vfp_abi (pcs_variant, false))
4341 return NULL;
4343 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4345 int count;
4346 enum machine_mode ag_mode;
4347 int i;
4348 rtx par;
4349 int shift;
4351 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4352 &ag_mode, &count);
4354 if (!TARGET_NEON)
4356 if (ag_mode == V2SImode)
4357 ag_mode = DImode;
4358 else if (ag_mode == V4SImode)
4360 ag_mode = DImode;
4361 count *= 2;
4364 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4365 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4366 for (i = 0; i < count; i++)
4368 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4369 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4370 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4371 XVECEXP (par, 0, i) = tmp;
4374 return par;
4377 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4380 static void
4381 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4382 enum machine_mode mode ATTRIBUTE_UNUSED,
4383 const_tree type ATTRIBUTE_UNUSED)
4385 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4386 pcum->aapcs_vfp_reg_alloc = 0;
4387 return;
4390 #define AAPCS_CP(X) \
4392 aapcs_ ## X ## _cum_init, \
4393 aapcs_ ## X ## _is_call_candidate, \
4394 aapcs_ ## X ## _allocate, \
4395 aapcs_ ## X ## _is_return_candidate, \
4396 aapcs_ ## X ## _allocate_return_reg, \
4397 aapcs_ ## X ## _advance \
4400 /* Table of co-processors that can be used to pass arguments in
4401 registers. Idealy no arugment should be a candidate for more than
4402 one co-processor table entry, but the table is processed in order
4403 and stops after the first match. If that entry then fails to put
4404 the argument into a co-processor register, the argument will go on
4405 the stack. */
4406 static struct
4408 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4409 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4411 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4412 BLKmode) is a candidate for this co-processor's registers; this
4413 function should ignore any position-dependent state in
4414 CUMULATIVE_ARGS and only use call-type dependent information. */
4415 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4417 /* Return true if the argument does get a co-processor register; it
4418 should set aapcs_reg to an RTX of the register allocated as is
4419 required for a return from FUNCTION_ARG. */
4420 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4422 /* Return true if a result of mode MODE (or type TYPE if MODE is
4423 BLKmode) is can be returned in this co-processor's registers. */
4424 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4426 /* Allocate and return an RTX element to hold the return type of a
4427 call, this routine must not fail and will only be called if
4428 is_return_candidate returned true with the same parameters. */
4429 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4431 /* Finish processing this argument and prepare to start processing
4432 the next one. */
4433 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4434 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4436 AAPCS_CP(vfp)
4439 #undef AAPCS_CP
4441 static int
4442 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4443 const_tree type)
4445 int i;
4447 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4448 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4449 return i;
4451 return -1;
4454 static int
4455 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4457 /* We aren't passed a decl, so we can't check that a call is local.
4458 However, it isn't clear that that would be a win anyway, since it
4459 might limit some tail-calling opportunities. */
4460 enum arm_pcs pcs_variant;
4462 if (fntype)
4464 const_tree fndecl = NULL_TREE;
4466 if (TREE_CODE (fntype) == FUNCTION_DECL)
4468 fndecl = fntype;
4469 fntype = TREE_TYPE (fntype);
4472 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4474 else
4475 pcs_variant = arm_pcs_default;
4477 if (pcs_variant != ARM_PCS_AAPCS)
4479 int i;
4481 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4482 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4483 TYPE_MODE (type),
4484 type))
4485 return i;
4487 return -1;
4490 static rtx
4491 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4492 const_tree fntype)
4494 /* We aren't passed a decl, so we can't check that a call is local.
4495 However, it isn't clear that that would be a win anyway, since it
4496 might limit some tail-calling opportunities. */
4497 enum arm_pcs pcs_variant;
4498 int unsignedp ATTRIBUTE_UNUSED;
4500 if (fntype)
4502 const_tree fndecl = NULL_TREE;
4504 if (TREE_CODE (fntype) == FUNCTION_DECL)
4506 fndecl = fntype;
4507 fntype = TREE_TYPE (fntype);
4510 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4512 else
4513 pcs_variant = arm_pcs_default;
4515 /* Promote integer types. */
4516 if (type && INTEGRAL_TYPE_P (type))
4517 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4519 if (pcs_variant != ARM_PCS_AAPCS)
4521 int i;
4523 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4524 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4525 type))
4526 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4527 mode, type);
4530 /* Promotes small structs returned in a register to full-word size
4531 for big-endian AAPCS. */
4532 if (type && arm_return_in_msb (type))
4534 HOST_WIDE_INT size = int_size_in_bytes (type);
4535 if (size % UNITS_PER_WORD != 0)
4537 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4538 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4542 return gen_rtx_REG (mode, R0_REGNUM);
4545 static rtx
4546 aapcs_libcall_value (enum machine_mode mode)
4548 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4549 && GET_MODE_SIZE (mode) <= 4)
4550 mode = SImode;
4552 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4555 /* Lay out a function argument using the AAPCS rules. The rule
4556 numbers referred to here are those in the AAPCS. */
4557 static void
4558 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4559 const_tree type, bool named)
4561 int nregs, nregs2;
4562 int ncrn;
4564 /* We only need to do this once per argument. */
4565 if (pcum->aapcs_arg_processed)
4566 return;
4568 pcum->aapcs_arg_processed = true;
4570 /* Special case: if named is false then we are handling an incoming
4571 anonymous argument which is on the stack. */
4572 if (!named)
4573 return;
4575 /* Is this a potential co-processor register candidate? */
4576 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4578 int slot = aapcs_select_call_coproc (pcum, mode, type);
4579 pcum->aapcs_cprc_slot = slot;
4581 /* We don't have to apply any of the rules from part B of the
4582 preparation phase, these are handled elsewhere in the
4583 compiler. */
4585 if (slot >= 0)
4587 /* A Co-processor register candidate goes either in its own
4588 class of registers or on the stack. */
4589 if (!pcum->aapcs_cprc_failed[slot])
4591 /* C1.cp - Try to allocate the argument to co-processor
4592 registers. */
4593 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4594 return;
4596 /* C2.cp - Put the argument on the stack and note that we
4597 can't assign any more candidates in this slot. We also
4598 need to note that we have allocated stack space, so that
4599 we won't later try to split a non-cprc candidate between
4600 core registers and the stack. */
4601 pcum->aapcs_cprc_failed[slot] = true;
4602 pcum->can_split = false;
4605 /* We didn't get a register, so this argument goes on the
4606 stack. */
4607 gcc_assert (pcum->can_split == false);
4608 return;
4612 /* C3 - For double-word aligned arguments, round the NCRN up to the
4613 next even number. */
4614 ncrn = pcum->aapcs_ncrn;
4615 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4616 ncrn++;
4618 nregs = ARM_NUM_REGS2(mode, type);
4620 /* Sigh, this test should really assert that nregs > 0, but a GCC
4621 extension allows empty structs and then gives them empty size; it
4622 then allows such a structure to be passed by value. For some of
4623 the code below we have to pretend that such an argument has
4624 non-zero size so that we 'locate' it correctly either in
4625 registers or on the stack. */
4626 gcc_assert (nregs >= 0);
4628 nregs2 = nregs ? nregs : 1;
4630 /* C4 - Argument fits entirely in core registers. */
4631 if (ncrn + nregs2 <= NUM_ARG_REGS)
4633 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4634 pcum->aapcs_next_ncrn = ncrn + nregs;
4635 return;
4638 /* C5 - Some core registers left and there are no arguments already
4639 on the stack: split this argument between the remaining core
4640 registers and the stack. */
4641 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4643 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4644 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4645 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4646 return;
4649 /* C6 - NCRN is set to 4. */
4650 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4652 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4653 return;
4656 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4657 for a call to a function whose data type is FNTYPE.
4658 For a library call, FNTYPE is NULL. */
4659 void
4660 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4661 rtx libname,
4662 tree fndecl ATTRIBUTE_UNUSED)
4664 /* Long call handling. */
4665 if (fntype)
4666 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4667 else
4668 pcum->pcs_variant = arm_pcs_default;
4670 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4672 if (arm_libcall_uses_aapcs_base (libname))
4673 pcum->pcs_variant = ARM_PCS_AAPCS;
4675 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4676 pcum->aapcs_reg = NULL_RTX;
4677 pcum->aapcs_partial = 0;
4678 pcum->aapcs_arg_processed = false;
4679 pcum->aapcs_cprc_slot = -1;
4680 pcum->can_split = true;
4682 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4684 int i;
4686 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4688 pcum->aapcs_cprc_failed[i] = false;
4689 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4692 return;
4695 /* Legacy ABIs */
4697 /* On the ARM, the offset starts at 0. */
4698 pcum->nregs = 0;
4699 pcum->iwmmxt_nregs = 0;
4700 pcum->can_split = true;
4702 /* Varargs vectors are treated the same as long long.
4703 named_count avoids having to change the way arm handles 'named' */
4704 pcum->named_count = 0;
4705 pcum->nargs = 0;
4707 if (TARGET_REALLY_IWMMXT && fntype)
4709 tree fn_arg;
4711 for (fn_arg = TYPE_ARG_TYPES (fntype);
4712 fn_arg;
4713 fn_arg = TREE_CHAIN (fn_arg))
4714 pcum->named_count += 1;
4716 if (! pcum->named_count)
4717 pcum->named_count = INT_MAX;
4722 /* Return true if mode/type need doubleword alignment. */
4723 static bool
4724 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4726 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4727 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4731 /* Determine where to put an argument to a function.
4732 Value is zero to push the argument on the stack,
4733 or a hard register in which to store the argument.
4735 MODE is the argument's machine mode.
4736 TYPE is the data type of the argument (as a tree).
4737 This is null for libcalls where that information may
4738 not be available.
4739 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4740 the preceding args and about the function being called.
4741 NAMED is nonzero if this argument is a named parameter
4742 (otherwise it is an extra parameter matching an ellipsis).
4744 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4745 other arguments are passed on the stack. If (NAMED == 0) (which happens
4746 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4747 defined), say it is passed in the stack (function_prologue will
4748 indeed make it pass in the stack if necessary). */
4750 static rtx
4751 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4752 const_tree type, bool named)
4754 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4755 int nregs;
4757 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4758 a call insn (op3 of a call_value insn). */
4759 if (mode == VOIDmode)
4760 return const0_rtx;
4762 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4764 aapcs_layout_arg (pcum, mode, type, named);
4765 return pcum->aapcs_reg;
4768 /* Varargs vectors are treated the same as long long.
4769 named_count avoids having to change the way arm handles 'named' */
4770 if (TARGET_IWMMXT_ABI
4771 && arm_vector_mode_supported_p (mode)
4772 && pcum->named_count > pcum->nargs + 1)
4774 if (pcum->iwmmxt_nregs <= 9)
4775 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4776 else
4778 pcum->can_split = false;
4779 return NULL_RTX;
4783 /* Put doubleword aligned quantities in even register pairs. */
4784 if (pcum->nregs & 1
4785 && ARM_DOUBLEWORD_ALIGN
4786 && arm_needs_doubleword_align (mode, type))
4787 pcum->nregs++;
4789 /* Only allow splitting an arg between regs and memory if all preceding
4790 args were allocated to regs. For args passed by reference we only count
4791 the reference pointer. */
4792 if (pcum->can_split)
4793 nregs = 1;
4794 else
4795 nregs = ARM_NUM_REGS2 (mode, type);
4797 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4798 return NULL_RTX;
4800 return gen_rtx_REG (mode, pcum->nregs);
4803 static unsigned int
4804 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4806 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4807 ? DOUBLEWORD_ALIGNMENT
4808 : PARM_BOUNDARY);
4811 static int
4812 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4813 tree type, bool named)
4815 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4816 int nregs = pcum->nregs;
4818 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4820 aapcs_layout_arg (pcum, mode, type, named);
4821 return pcum->aapcs_partial;
4824 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4825 return 0;
4827 if (NUM_ARG_REGS > nregs
4828 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4829 && pcum->can_split)
4830 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4832 return 0;
4835 /* Update the data in PCUM to advance over an argument
4836 of mode MODE and data type TYPE.
4837 (TYPE is null for libcalls where that information may not be available.) */
4839 static void
4840 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4841 const_tree type, bool named)
4843 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4845 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4847 aapcs_layout_arg (pcum, mode, type, named);
4849 if (pcum->aapcs_cprc_slot >= 0)
4851 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4852 type);
4853 pcum->aapcs_cprc_slot = -1;
4856 /* Generic stuff. */
4857 pcum->aapcs_arg_processed = false;
4858 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4859 pcum->aapcs_reg = NULL_RTX;
4860 pcum->aapcs_partial = 0;
4862 else
4864 pcum->nargs += 1;
4865 if (arm_vector_mode_supported_p (mode)
4866 && pcum->named_count > pcum->nargs
4867 && TARGET_IWMMXT_ABI)
4868 pcum->iwmmxt_nregs += 1;
4869 else
4870 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4874 /* Variable sized types are passed by reference. This is a GCC
4875 extension to the ARM ABI. */
4877 static bool
4878 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4879 enum machine_mode mode ATTRIBUTE_UNUSED,
4880 const_tree type, bool named ATTRIBUTE_UNUSED)
4882 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4885 /* Encode the current state of the #pragma [no_]long_calls. */
4886 typedef enum
4888 OFF, /* No #pragma [no_]long_calls is in effect. */
4889 LONG, /* #pragma long_calls is in effect. */
4890 SHORT /* #pragma no_long_calls is in effect. */
4891 } arm_pragma_enum;
4893 static arm_pragma_enum arm_pragma_long_calls = OFF;
4895 void
4896 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4898 arm_pragma_long_calls = LONG;
4901 void
4902 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4904 arm_pragma_long_calls = SHORT;
4907 void
4908 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4910 arm_pragma_long_calls = OFF;
4913 /* Handle an attribute requiring a FUNCTION_DECL;
4914 arguments as in struct attribute_spec.handler. */
4915 static tree
4916 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4917 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4919 if (TREE_CODE (*node) != FUNCTION_DECL)
4921 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4922 name);
4923 *no_add_attrs = true;
4926 return NULL_TREE;
4929 /* Handle an "interrupt" or "isr" attribute;
4930 arguments as in struct attribute_spec.handler. */
4931 static tree
4932 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4933 bool *no_add_attrs)
4935 if (DECL_P (*node))
4937 if (TREE_CODE (*node) != FUNCTION_DECL)
4939 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4940 name);
4941 *no_add_attrs = true;
4943 /* FIXME: the argument if any is checked for type attributes;
4944 should it be checked for decl ones? */
4946 else
4948 if (TREE_CODE (*node) == FUNCTION_TYPE
4949 || TREE_CODE (*node) == METHOD_TYPE)
4951 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4953 warning (OPT_Wattributes, "%qE attribute ignored",
4954 name);
4955 *no_add_attrs = true;
4958 else if (TREE_CODE (*node) == POINTER_TYPE
4959 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4960 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4961 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4963 *node = build_variant_type_copy (*node);
4964 TREE_TYPE (*node) = build_type_attribute_variant
4965 (TREE_TYPE (*node),
4966 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4967 *no_add_attrs = true;
4969 else
4971 /* Possibly pass this attribute on from the type to a decl. */
4972 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4973 | (int) ATTR_FLAG_FUNCTION_NEXT
4974 | (int) ATTR_FLAG_ARRAY_NEXT))
4976 *no_add_attrs = true;
4977 return tree_cons (name, args, NULL_TREE);
4979 else
4981 warning (OPT_Wattributes, "%qE attribute ignored",
4982 name);
4987 return NULL_TREE;
4990 /* Handle a "pcs" attribute; arguments as in struct
4991 attribute_spec.handler. */
4992 static tree
4993 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4994 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4996 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4998 warning (OPT_Wattributes, "%qE attribute ignored", name);
4999 *no_add_attrs = true;
5001 return NULL_TREE;
5004 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5005 /* Handle the "notshared" attribute. This attribute is another way of
5006 requesting hidden visibility. ARM's compiler supports
5007 "__declspec(notshared)"; we support the same thing via an
5008 attribute. */
5010 static tree
5011 arm_handle_notshared_attribute (tree *node,
5012 tree name ATTRIBUTE_UNUSED,
5013 tree args ATTRIBUTE_UNUSED,
5014 int flags ATTRIBUTE_UNUSED,
5015 bool *no_add_attrs)
5017 tree decl = TYPE_NAME (*node);
5019 if (decl)
5021 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5022 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5023 *no_add_attrs = false;
5025 return NULL_TREE;
5027 #endif
5029 /* Return 0 if the attributes for two types are incompatible, 1 if they
5030 are compatible, and 2 if they are nearly compatible (which causes a
5031 warning to be generated). */
5032 static int
5033 arm_comp_type_attributes (const_tree type1, const_tree type2)
5035 int l1, l2, s1, s2;
5037 /* Check for mismatch of non-default calling convention. */
5038 if (TREE_CODE (type1) != FUNCTION_TYPE)
5039 return 1;
5041 /* Check for mismatched call attributes. */
5042 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5043 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5044 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5045 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5047 /* Only bother to check if an attribute is defined. */
5048 if (l1 | l2 | s1 | s2)
5050 /* If one type has an attribute, the other must have the same attribute. */
5051 if ((l1 != l2) || (s1 != s2))
5052 return 0;
5054 /* Disallow mixed attributes. */
5055 if ((l1 & s2) || (l2 & s1))
5056 return 0;
5059 /* Check for mismatched ISR attribute. */
5060 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5061 if (! l1)
5062 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5063 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5064 if (! l2)
5065 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5066 if (l1 != l2)
5067 return 0;
5069 return 1;
5072 /* Assigns default attributes to newly defined type. This is used to
5073 set short_call/long_call attributes for function types of
5074 functions defined inside corresponding #pragma scopes. */
5075 static void
5076 arm_set_default_type_attributes (tree type)
5078 /* Add __attribute__ ((long_call)) to all functions, when
5079 inside #pragma long_calls or __attribute__ ((short_call)),
5080 when inside #pragma no_long_calls. */
5081 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5083 tree type_attr_list, attr_name;
5084 type_attr_list = TYPE_ATTRIBUTES (type);
5086 if (arm_pragma_long_calls == LONG)
5087 attr_name = get_identifier ("long_call");
5088 else if (arm_pragma_long_calls == SHORT)
5089 attr_name = get_identifier ("short_call");
5090 else
5091 return;
5093 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5094 TYPE_ATTRIBUTES (type) = type_attr_list;
5098 /* Return true if DECL is known to be linked into section SECTION. */
5100 static bool
5101 arm_function_in_section_p (tree decl, section *section)
5103 /* We can only be certain about functions defined in the same
5104 compilation unit. */
5105 if (!TREE_STATIC (decl))
5106 return false;
5108 /* Make sure that SYMBOL always binds to the definition in this
5109 compilation unit. */
5110 if (!targetm.binds_local_p (decl))
5111 return false;
5113 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5114 if (!DECL_SECTION_NAME (decl))
5116 /* Make sure that we will not create a unique section for DECL. */
5117 if (flag_function_sections || DECL_ONE_ONLY (decl))
5118 return false;
5121 return function_section (decl) == section;
5124 /* Return nonzero if a 32-bit "long_call" should be generated for
5125 a call from the current function to DECL. We generate a long_call
5126 if the function:
5128 a. has an __attribute__((long call))
5129 or b. is within the scope of a #pragma long_calls
5130 or c. the -mlong-calls command line switch has been specified
5132 However we do not generate a long call if the function:
5134 d. has an __attribute__ ((short_call))
5135 or e. is inside the scope of a #pragma no_long_calls
5136 or f. is defined in the same section as the current function. */
5138 bool
5139 arm_is_long_call_p (tree decl)
5141 tree attrs;
5143 if (!decl)
5144 return TARGET_LONG_CALLS;
5146 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5147 if (lookup_attribute ("short_call", attrs))
5148 return false;
5150 /* For "f", be conservative, and only cater for cases in which the
5151 whole of the current function is placed in the same section. */
5152 if (!flag_reorder_blocks_and_partition
5153 && TREE_CODE (decl) == FUNCTION_DECL
5154 && arm_function_in_section_p (decl, current_function_section ()))
5155 return false;
5157 if (lookup_attribute ("long_call", attrs))
5158 return true;
5160 return TARGET_LONG_CALLS;
5163 /* Return nonzero if it is ok to make a tail-call to DECL. */
5164 static bool
5165 arm_function_ok_for_sibcall (tree decl, tree exp)
5167 unsigned long func_type;
5169 if (cfun->machine->sibcall_blocked)
5170 return false;
5172 /* Never tailcall something for which we have no decl, or if we
5173 are generating code for Thumb-1. */
5174 if (decl == NULL || TARGET_THUMB1)
5175 return false;
5177 /* The PIC register is live on entry to VxWorks PLT entries, so we
5178 must make the call before restoring the PIC register. */
5179 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5180 return false;
5182 /* Cannot tail-call to long calls, since these are out of range of
5183 a branch instruction. */
5184 if (arm_is_long_call_p (decl))
5185 return false;
5187 /* If we are interworking and the function is not declared static
5188 then we can't tail-call it unless we know that it exists in this
5189 compilation unit (since it might be a Thumb routine). */
5190 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5191 return false;
5193 func_type = arm_current_func_type ();
5194 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5195 if (IS_INTERRUPT (func_type))
5196 return false;
5198 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5200 /* Check that the return value locations are the same. For
5201 example that we aren't returning a value from the sibling in
5202 a VFP register but then need to transfer it to a core
5203 register. */
5204 rtx a, b;
5206 a = arm_function_value (TREE_TYPE (exp), decl, false);
5207 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5208 cfun->decl, false);
5209 if (!rtx_equal_p (a, b))
5210 return false;
5213 /* Never tailcall if function may be called with a misaligned SP. */
5214 if (IS_STACKALIGN (func_type))
5215 return false;
5217 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5218 references should become a NOP. Don't convert such calls into
5219 sibling calls. */
5220 if (TARGET_AAPCS_BASED
5221 && arm_abi == ARM_ABI_AAPCS
5222 && DECL_WEAK (decl))
5223 return false;
5225 /* Everything else is ok. */
5226 return true;
5230 /* Addressing mode support functions. */
5232 /* Return nonzero if X is a legitimate immediate operand when compiling
5233 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5235 legitimate_pic_operand_p (rtx x)
5237 if (GET_CODE (x) == SYMBOL_REF
5238 || (GET_CODE (x) == CONST
5239 && GET_CODE (XEXP (x, 0)) == PLUS
5240 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5241 return 0;
5243 return 1;
5246 /* Record that the current function needs a PIC register. Initialize
5247 cfun->machine->pic_reg if we have not already done so. */
5249 static void
5250 require_pic_register (void)
5252 /* A lot of the logic here is made obscure by the fact that this
5253 routine gets called as part of the rtx cost estimation process.
5254 We don't want those calls to affect any assumptions about the real
5255 function; and further, we can't call entry_of_function() until we
5256 start the real expansion process. */
5257 if (!crtl->uses_pic_offset_table)
5259 gcc_assert (can_create_pseudo_p ());
5260 if (arm_pic_register != INVALID_REGNUM)
5262 if (!cfun->machine->pic_reg)
5263 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5265 /* Play games to avoid marking the function as needing pic
5266 if we are being called as part of the cost-estimation
5267 process. */
5268 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5269 crtl->uses_pic_offset_table = 1;
5271 else
5273 rtx seq, insn;
5275 if (!cfun->machine->pic_reg)
5276 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5278 /* Play games to avoid marking the function as needing pic
5279 if we are being called as part of the cost-estimation
5280 process. */
5281 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5283 crtl->uses_pic_offset_table = 1;
5284 start_sequence ();
5286 arm_load_pic_register (0UL);
5288 seq = get_insns ();
5289 end_sequence ();
5291 for (insn = seq; insn; insn = NEXT_INSN (insn))
5292 if (INSN_P (insn))
5293 INSN_LOCATOR (insn) = prologue_locator;
5295 /* We can be called during expansion of PHI nodes, where
5296 we can't yet emit instructions directly in the final
5297 insn stream. Queue the insns on the entry edge, they will
5298 be committed after everything else is expanded. */
5299 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5306 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5308 if (GET_CODE (orig) == SYMBOL_REF
5309 || GET_CODE (orig) == LABEL_REF)
5311 rtx insn;
5313 if (reg == 0)
5315 gcc_assert (can_create_pseudo_p ());
5316 reg = gen_reg_rtx (Pmode);
5319 /* VxWorks does not impose a fixed gap between segments; the run-time
5320 gap can be different from the object-file gap. We therefore can't
5321 use GOTOFF unless we are absolutely sure that the symbol is in the
5322 same segment as the GOT. Unfortunately, the flexibility of linker
5323 scripts means that we can't be sure of that in general, so assume
5324 that GOTOFF is never valid on VxWorks. */
5325 if ((GET_CODE (orig) == LABEL_REF
5326 || (GET_CODE (orig) == SYMBOL_REF &&
5327 SYMBOL_REF_LOCAL_P (orig)))
5328 && NEED_GOT_RELOC
5329 && !TARGET_VXWORKS_RTP)
5330 insn = arm_pic_static_addr (orig, reg);
5331 else
5333 rtx pat;
5334 rtx mem;
5336 /* If this function doesn't have a pic register, create one now. */
5337 require_pic_register ();
5339 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5341 /* Make the MEM as close to a constant as possible. */
5342 mem = SET_SRC (pat);
5343 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5344 MEM_READONLY_P (mem) = 1;
5345 MEM_NOTRAP_P (mem) = 1;
5347 insn = emit_insn (pat);
5350 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5351 by loop. */
5352 set_unique_reg_note (insn, REG_EQUAL, orig);
5354 return reg;
5356 else if (GET_CODE (orig) == CONST)
5358 rtx base, offset;
5360 if (GET_CODE (XEXP (orig, 0)) == PLUS
5361 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5362 return orig;
5364 /* Handle the case where we have: const (UNSPEC_TLS). */
5365 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5366 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5367 return orig;
5369 /* Handle the case where we have:
5370 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5371 CONST_INT. */
5372 if (GET_CODE (XEXP (orig, 0)) == PLUS
5373 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5374 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5376 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5377 return orig;
5380 if (reg == 0)
5382 gcc_assert (can_create_pseudo_p ());
5383 reg = gen_reg_rtx (Pmode);
5386 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5388 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5389 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5390 base == reg ? 0 : reg);
5392 if (GET_CODE (offset) == CONST_INT)
5394 /* The base register doesn't really matter, we only want to
5395 test the index for the appropriate mode. */
5396 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5398 gcc_assert (can_create_pseudo_p ());
5399 offset = force_reg (Pmode, offset);
5402 if (GET_CODE (offset) == CONST_INT)
5403 return plus_constant (Pmode, base, INTVAL (offset));
5406 if (GET_MODE_SIZE (mode) > 4
5407 && (GET_MODE_CLASS (mode) == MODE_INT
5408 || TARGET_SOFT_FLOAT))
5410 emit_insn (gen_addsi3 (reg, base, offset));
5411 return reg;
5414 return gen_rtx_PLUS (Pmode, base, offset);
5417 return orig;
5421 /* Find a spare register to use during the prolog of a function. */
5423 static int
5424 thumb_find_work_register (unsigned long pushed_regs_mask)
5426 int reg;
5428 /* Check the argument registers first as these are call-used. The
5429 register allocation order means that sometimes r3 might be used
5430 but earlier argument registers might not, so check them all. */
5431 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5432 if (!df_regs_ever_live_p (reg))
5433 return reg;
5435 /* Before going on to check the call-saved registers we can try a couple
5436 more ways of deducing that r3 is available. The first is when we are
5437 pushing anonymous arguments onto the stack and we have less than 4
5438 registers worth of fixed arguments(*). In this case r3 will be part of
5439 the variable argument list and so we can be sure that it will be
5440 pushed right at the start of the function. Hence it will be available
5441 for the rest of the prologue.
5442 (*): ie crtl->args.pretend_args_size is greater than 0. */
5443 if (cfun->machine->uses_anonymous_args
5444 && crtl->args.pretend_args_size > 0)
5445 return LAST_ARG_REGNUM;
5447 /* The other case is when we have fixed arguments but less than 4 registers
5448 worth. In this case r3 might be used in the body of the function, but
5449 it is not being used to convey an argument into the function. In theory
5450 we could just check crtl->args.size to see how many bytes are
5451 being passed in argument registers, but it seems that it is unreliable.
5452 Sometimes it will have the value 0 when in fact arguments are being
5453 passed. (See testcase execute/20021111-1.c for an example). So we also
5454 check the args_info.nregs field as well. The problem with this field is
5455 that it makes no allowances for arguments that are passed to the
5456 function but which are not used. Hence we could miss an opportunity
5457 when a function has an unused argument in r3. But it is better to be
5458 safe than to be sorry. */
5459 if (! cfun->machine->uses_anonymous_args
5460 && crtl->args.size >= 0
5461 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5462 && crtl->args.info.nregs < 4)
5463 return LAST_ARG_REGNUM;
5465 /* Otherwise look for a call-saved register that is going to be pushed. */
5466 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5467 if (pushed_regs_mask & (1 << reg))
5468 return reg;
5470 if (TARGET_THUMB2)
5472 /* Thumb-2 can use high regs. */
5473 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5474 if (pushed_regs_mask & (1 << reg))
5475 return reg;
5477 /* Something went wrong - thumb_compute_save_reg_mask()
5478 should have arranged for a suitable register to be pushed. */
5479 gcc_unreachable ();
5482 static GTY(()) int pic_labelno;
5484 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5485 low register. */
5487 void
5488 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5490 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5492 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5493 return;
5495 gcc_assert (flag_pic);
5497 pic_reg = cfun->machine->pic_reg;
5498 if (TARGET_VXWORKS_RTP)
5500 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5501 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5502 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5504 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5506 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5507 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5509 else
5511 /* We use an UNSPEC rather than a LABEL_REF because this label
5512 never appears in the code stream. */
5514 labelno = GEN_INT (pic_labelno++);
5515 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5516 l1 = gen_rtx_CONST (VOIDmode, l1);
5518 /* On the ARM the PC register contains 'dot + 8' at the time of the
5519 addition, on the Thumb it is 'dot + 4'. */
5520 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5521 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5522 UNSPEC_GOTSYM_OFF);
5523 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5525 if (TARGET_32BIT)
5527 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5529 else /* TARGET_THUMB1 */
5531 if (arm_pic_register != INVALID_REGNUM
5532 && REGNO (pic_reg) > LAST_LO_REGNUM)
5534 /* We will have pushed the pic register, so we should always be
5535 able to find a work register. */
5536 pic_tmp = gen_rtx_REG (SImode,
5537 thumb_find_work_register (saved_regs));
5538 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5539 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5540 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5542 else
5543 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5547 /* Need to emit this whether or not we obey regdecls,
5548 since setjmp/longjmp can cause life info to screw up. */
5549 emit_use (pic_reg);
5552 /* Generate code to load the address of a static var when flag_pic is set. */
5553 static rtx
5554 arm_pic_static_addr (rtx orig, rtx reg)
5556 rtx l1, labelno, offset_rtx, insn;
5558 gcc_assert (flag_pic);
5560 /* We use an UNSPEC rather than a LABEL_REF because this label
5561 never appears in the code stream. */
5562 labelno = GEN_INT (pic_labelno++);
5563 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5564 l1 = gen_rtx_CONST (VOIDmode, l1);
5566 /* On the ARM the PC register contains 'dot + 8' at the time of the
5567 addition, on the Thumb it is 'dot + 4'. */
5568 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5569 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5570 UNSPEC_SYMBOL_OFFSET);
5571 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5573 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5574 return insn;
5577 /* Return nonzero if X is valid as an ARM state addressing register. */
5578 static int
5579 arm_address_register_rtx_p (rtx x, int strict_p)
5581 int regno;
5583 if (GET_CODE (x) != REG)
5584 return 0;
5586 regno = REGNO (x);
5588 if (strict_p)
5589 return ARM_REGNO_OK_FOR_BASE_P (regno);
5591 return (regno <= LAST_ARM_REGNUM
5592 || regno >= FIRST_PSEUDO_REGISTER
5593 || regno == FRAME_POINTER_REGNUM
5594 || regno == ARG_POINTER_REGNUM);
5597 /* Return TRUE if this rtx is the difference of a symbol and a label,
5598 and will reduce to a PC-relative relocation in the object file.
5599 Expressions like this can be left alone when generating PIC, rather
5600 than forced through the GOT. */
5601 static int
5602 pcrel_constant_p (rtx x)
5604 if (GET_CODE (x) == MINUS)
5605 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5607 return FALSE;
5610 /* Return true if X will surely end up in an index register after next
5611 splitting pass. */
5612 static bool
5613 will_be_in_index_register (const_rtx x)
5615 /* arm.md: calculate_pic_address will split this into a register. */
5616 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5619 /* Return nonzero if X is a valid ARM state address operand. */
5621 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5622 int strict_p)
5624 bool use_ldrd;
5625 enum rtx_code code = GET_CODE (x);
5627 if (arm_address_register_rtx_p (x, strict_p))
5628 return 1;
5630 use_ldrd = (TARGET_LDRD
5631 && (mode == DImode
5632 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5634 if (code == POST_INC || code == PRE_DEC
5635 || ((code == PRE_INC || code == POST_DEC)
5636 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5637 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5639 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5640 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5641 && GET_CODE (XEXP (x, 1)) == PLUS
5642 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5644 rtx addend = XEXP (XEXP (x, 1), 1);
5646 /* Don't allow ldrd post increment by register because it's hard
5647 to fixup invalid register choices. */
5648 if (use_ldrd
5649 && GET_CODE (x) == POST_MODIFY
5650 && GET_CODE (addend) == REG)
5651 return 0;
5653 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5654 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5657 /* After reload constants split into minipools will have addresses
5658 from a LABEL_REF. */
5659 else if (reload_completed
5660 && (code == LABEL_REF
5661 || (code == CONST
5662 && GET_CODE (XEXP (x, 0)) == PLUS
5663 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5664 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5665 return 1;
5667 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5668 return 0;
5670 else if (code == PLUS)
5672 rtx xop0 = XEXP (x, 0);
5673 rtx xop1 = XEXP (x, 1);
5675 return ((arm_address_register_rtx_p (xop0, strict_p)
5676 && ((GET_CODE(xop1) == CONST_INT
5677 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5678 || (!strict_p && will_be_in_index_register (xop1))))
5679 || (arm_address_register_rtx_p (xop1, strict_p)
5680 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5683 #if 0
5684 /* Reload currently can't handle MINUS, so disable this for now */
5685 else if (GET_CODE (x) == MINUS)
5687 rtx xop0 = XEXP (x, 0);
5688 rtx xop1 = XEXP (x, 1);
5690 return (arm_address_register_rtx_p (xop0, strict_p)
5691 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5693 #endif
5695 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5696 && code == SYMBOL_REF
5697 && CONSTANT_POOL_ADDRESS_P (x)
5698 && ! (flag_pic
5699 && symbol_mentioned_p (get_pool_constant (x))
5700 && ! pcrel_constant_p (get_pool_constant (x))))
5701 return 1;
5703 return 0;
5706 /* Return nonzero if X is a valid Thumb-2 address operand. */
5707 static int
5708 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5710 bool use_ldrd;
5711 enum rtx_code code = GET_CODE (x);
5713 if (arm_address_register_rtx_p (x, strict_p))
5714 return 1;
5716 use_ldrd = (TARGET_LDRD
5717 && (mode == DImode
5718 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5720 if (code == POST_INC || code == PRE_DEC
5721 || ((code == PRE_INC || code == POST_DEC)
5722 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5723 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5725 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5726 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5727 && GET_CODE (XEXP (x, 1)) == PLUS
5728 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5730 /* Thumb-2 only has autoincrement by constant. */
5731 rtx addend = XEXP (XEXP (x, 1), 1);
5732 HOST_WIDE_INT offset;
5734 if (GET_CODE (addend) != CONST_INT)
5735 return 0;
5737 offset = INTVAL(addend);
5738 if (GET_MODE_SIZE (mode) <= 4)
5739 return (offset > -256 && offset < 256);
5741 return (use_ldrd && offset > -1024 && offset < 1024
5742 && (offset & 3) == 0);
5745 /* After reload constants split into minipools will have addresses
5746 from a LABEL_REF. */
5747 else if (reload_completed
5748 && (code == LABEL_REF
5749 || (code == CONST
5750 && GET_CODE (XEXP (x, 0)) == PLUS
5751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5752 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5753 return 1;
5755 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5756 return 0;
5758 else if (code == PLUS)
5760 rtx xop0 = XEXP (x, 0);
5761 rtx xop1 = XEXP (x, 1);
5763 return ((arm_address_register_rtx_p (xop0, strict_p)
5764 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5765 || (!strict_p && will_be_in_index_register (xop1))))
5766 || (arm_address_register_rtx_p (xop1, strict_p)
5767 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5770 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5771 && code == SYMBOL_REF
5772 && CONSTANT_POOL_ADDRESS_P (x)
5773 && ! (flag_pic
5774 && symbol_mentioned_p (get_pool_constant (x))
5775 && ! pcrel_constant_p (get_pool_constant (x))))
5776 return 1;
5778 return 0;
5781 /* Return nonzero if INDEX is valid for an address index operand in
5782 ARM state. */
5783 static int
5784 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5785 int strict_p)
5787 HOST_WIDE_INT range;
5788 enum rtx_code code = GET_CODE (index);
5790 /* Standard coprocessor addressing modes. */
5791 if (TARGET_HARD_FLOAT
5792 && TARGET_VFP
5793 && (mode == SFmode || mode == DFmode))
5794 return (code == CONST_INT && INTVAL (index) < 1024
5795 && INTVAL (index) > -1024
5796 && (INTVAL (index) & 3) == 0);
5798 /* For quad modes, we restrict the constant offset to be slightly less
5799 than what the instruction format permits. We do this because for
5800 quad mode moves, we will actually decompose them into two separate
5801 double-mode reads or writes. INDEX must therefore be a valid
5802 (double-mode) offset and so should INDEX+8. */
5803 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5804 return (code == CONST_INT
5805 && INTVAL (index) < 1016
5806 && INTVAL (index) > -1024
5807 && (INTVAL (index) & 3) == 0);
5809 /* We have no such constraint on double mode offsets, so we permit the
5810 full range of the instruction format. */
5811 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5812 return (code == CONST_INT
5813 && INTVAL (index) < 1024
5814 && INTVAL (index) > -1024
5815 && (INTVAL (index) & 3) == 0);
5817 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5818 return (code == CONST_INT
5819 && INTVAL (index) < 1024
5820 && INTVAL (index) > -1024
5821 && (INTVAL (index) & 3) == 0);
5823 if (arm_address_register_rtx_p (index, strict_p)
5824 && (GET_MODE_SIZE (mode) <= 4))
5825 return 1;
5827 if (mode == DImode || mode == DFmode)
5829 if (code == CONST_INT)
5831 HOST_WIDE_INT val = INTVAL (index);
5833 if (TARGET_LDRD)
5834 return val > -256 && val < 256;
5835 else
5836 return val > -4096 && val < 4092;
5839 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5842 if (GET_MODE_SIZE (mode) <= 4
5843 && ! (arm_arch4
5844 && (mode == HImode
5845 || mode == HFmode
5846 || (mode == QImode && outer == SIGN_EXTEND))))
5848 if (code == MULT)
5850 rtx xiop0 = XEXP (index, 0);
5851 rtx xiop1 = XEXP (index, 1);
5853 return ((arm_address_register_rtx_p (xiop0, strict_p)
5854 && power_of_two_operand (xiop1, SImode))
5855 || (arm_address_register_rtx_p (xiop1, strict_p)
5856 && power_of_two_operand (xiop0, SImode)));
5858 else if (code == LSHIFTRT || code == ASHIFTRT
5859 || code == ASHIFT || code == ROTATERT)
5861 rtx op = XEXP (index, 1);
5863 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5864 && GET_CODE (op) == CONST_INT
5865 && INTVAL (op) > 0
5866 && INTVAL (op) <= 31);
5870 /* For ARM v4 we may be doing a sign-extend operation during the
5871 load. */
5872 if (arm_arch4)
5874 if (mode == HImode
5875 || mode == HFmode
5876 || (outer == SIGN_EXTEND && mode == QImode))
5877 range = 256;
5878 else
5879 range = 4096;
5881 else
5882 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5884 return (code == CONST_INT
5885 && INTVAL (index) < range
5886 && INTVAL (index) > -range);
5889 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5890 index operand. i.e. 1, 2, 4 or 8. */
5891 static bool
5892 thumb2_index_mul_operand (rtx op)
5894 HOST_WIDE_INT val;
5896 if (GET_CODE(op) != CONST_INT)
5897 return false;
5899 val = INTVAL(op);
5900 return (val == 1 || val == 2 || val == 4 || val == 8);
5903 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5904 static int
5905 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5907 enum rtx_code code = GET_CODE (index);
5909 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5910 /* Standard coprocessor addressing modes. */
5911 if (TARGET_HARD_FLOAT
5912 && TARGET_VFP
5913 && (mode == SFmode || mode == DFmode))
5914 return (code == CONST_INT && INTVAL (index) < 1024
5915 /* Thumb-2 allows only > -256 index range for it's core register
5916 load/stores. Since we allow SF/DF in core registers, we have
5917 to use the intersection between -256~4096 (core) and -1024~1024
5918 (coprocessor). */
5919 && INTVAL (index) > -256
5920 && (INTVAL (index) & 3) == 0);
5922 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5924 /* For DImode assume values will usually live in core regs
5925 and only allow LDRD addressing modes. */
5926 if (!TARGET_LDRD || mode != DImode)
5927 return (code == CONST_INT
5928 && INTVAL (index) < 1024
5929 && INTVAL (index) > -1024
5930 && (INTVAL (index) & 3) == 0);
5933 /* For quad modes, we restrict the constant offset to be slightly less
5934 than what the instruction format permits. We do this because for
5935 quad mode moves, we will actually decompose them into two separate
5936 double-mode reads or writes. INDEX must therefore be a valid
5937 (double-mode) offset and so should INDEX+8. */
5938 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5939 return (code == CONST_INT
5940 && INTVAL (index) < 1016
5941 && INTVAL (index) > -1024
5942 && (INTVAL (index) & 3) == 0);
5944 /* We have no such constraint on double mode offsets, so we permit the
5945 full range of the instruction format. */
5946 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5947 return (code == CONST_INT
5948 && INTVAL (index) < 1024
5949 && INTVAL (index) > -1024
5950 && (INTVAL (index) & 3) == 0);
5952 if (arm_address_register_rtx_p (index, strict_p)
5953 && (GET_MODE_SIZE (mode) <= 4))
5954 return 1;
5956 if (mode == DImode || mode == DFmode)
5958 if (code == CONST_INT)
5960 HOST_WIDE_INT val = INTVAL (index);
5961 /* ??? Can we assume ldrd for thumb2? */
5962 /* Thumb-2 ldrd only has reg+const addressing modes. */
5963 /* ldrd supports offsets of +-1020.
5964 However the ldr fallback does not. */
5965 return val > -256 && val < 256 && (val & 3) == 0;
5967 else
5968 return 0;
5971 if (code == MULT)
5973 rtx xiop0 = XEXP (index, 0);
5974 rtx xiop1 = XEXP (index, 1);
5976 return ((arm_address_register_rtx_p (xiop0, strict_p)
5977 && thumb2_index_mul_operand (xiop1))
5978 || (arm_address_register_rtx_p (xiop1, strict_p)
5979 && thumb2_index_mul_operand (xiop0)));
5981 else if (code == ASHIFT)
5983 rtx op = XEXP (index, 1);
5985 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5986 && GET_CODE (op) == CONST_INT
5987 && INTVAL (op) > 0
5988 && INTVAL (op) <= 3);
5991 return (code == CONST_INT
5992 && INTVAL (index) < 4096
5993 && INTVAL (index) > -256);
5996 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5997 static int
5998 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6000 int regno;
6002 if (GET_CODE (x) != REG)
6003 return 0;
6005 regno = REGNO (x);
6007 if (strict_p)
6008 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6010 return (regno <= LAST_LO_REGNUM
6011 || regno > LAST_VIRTUAL_REGISTER
6012 || regno == FRAME_POINTER_REGNUM
6013 || (GET_MODE_SIZE (mode) >= 4
6014 && (regno == STACK_POINTER_REGNUM
6015 || regno >= FIRST_PSEUDO_REGISTER
6016 || x == hard_frame_pointer_rtx
6017 || x == arg_pointer_rtx)));
6020 /* Return nonzero if x is a legitimate index register. This is the case
6021 for any base register that can access a QImode object. */
6022 inline static int
6023 thumb1_index_register_rtx_p (rtx x, int strict_p)
6025 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6028 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6030 The AP may be eliminated to either the SP or the FP, so we use the
6031 least common denominator, e.g. SImode, and offsets from 0 to 64.
6033 ??? Verify whether the above is the right approach.
6035 ??? Also, the FP may be eliminated to the SP, so perhaps that
6036 needs special handling also.
6038 ??? Look at how the mips16 port solves this problem. It probably uses
6039 better ways to solve some of these problems.
6041 Although it is not incorrect, we don't accept QImode and HImode
6042 addresses based on the frame pointer or arg pointer until the
6043 reload pass starts. This is so that eliminating such addresses
6044 into stack based ones won't produce impossible code. */
6046 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6048 /* ??? Not clear if this is right. Experiment. */
6049 if (GET_MODE_SIZE (mode) < 4
6050 && !(reload_in_progress || reload_completed)
6051 && (reg_mentioned_p (frame_pointer_rtx, x)
6052 || reg_mentioned_p (arg_pointer_rtx, x)
6053 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6054 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6055 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6056 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6057 return 0;
6059 /* Accept any base register. SP only in SImode or larger. */
6060 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6061 return 1;
6063 /* This is PC relative data before arm_reorg runs. */
6064 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6065 && GET_CODE (x) == SYMBOL_REF
6066 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6067 return 1;
6069 /* This is PC relative data after arm_reorg runs. */
6070 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6071 && reload_completed
6072 && (GET_CODE (x) == LABEL_REF
6073 || (GET_CODE (x) == CONST
6074 && GET_CODE (XEXP (x, 0)) == PLUS
6075 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6076 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6077 return 1;
6079 /* Post-inc indexing only supported for SImode and larger. */
6080 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6081 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6082 return 1;
6084 else if (GET_CODE (x) == PLUS)
6086 /* REG+REG address can be any two index registers. */
6087 /* We disallow FRAME+REG addressing since we know that FRAME
6088 will be replaced with STACK, and SP relative addressing only
6089 permits SP+OFFSET. */
6090 if (GET_MODE_SIZE (mode) <= 4
6091 && XEXP (x, 0) != frame_pointer_rtx
6092 && XEXP (x, 1) != frame_pointer_rtx
6093 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6094 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6095 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6096 return 1;
6098 /* REG+const has 5-7 bit offset for non-SP registers. */
6099 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6100 || XEXP (x, 0) == arg_pointer_rtx)
6101 && GET_CODE (XEXP (x, 1)) == CONST_INT
6102 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6103 return 1;
6105 /* REG+const has 10-bit offset for SP, but only SImode and
6106 larger is supported. */
6107 /* ??? Should probably check for DI/DFmode overflow here
6108 just like GO_IF_LEGITIMATE_OFFSET does. */
6109 else if (GET_CODE (XEXP (x, 0)) == REG
6110 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6111 && GET_MODE_SIZE (mode) >= 4
6112 && GET_CODE (XEXP (x, 1)) == CONST_INT
6113 && INTVAL (XEXP (x, 1)) >= 0
6114 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6115 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6116 return 1;
6118 else if (GET_CODE (XEXP (x, 0)) == REG
6119 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6120 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6121 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6122 && REGNO (XEXP (x, 0))
6123 <= LAST_VIRTUAL_POINTER_REGISTER))
6124 && GET_MODE_SIZE (mode) >= 4
6125 && GET_CODE (XEXP (x, 1)) == CONST_INT
6126 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6127 return 1;
6130 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6131 && GET_MODE_SIZE (mode) == 4
6132 && GET_CODE (x) == SYMBOL_REF
6133 && CONSTANT_POOL_ADDRESS_P (x)
6134 && ! (flag_pic
6135 && symbol_mentioned_p (get_pool_constant (x))
6136 && ! pcrel_constant_p (get_pool_constant (x))))
6137 return 1;
6139 return 0;
6142 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6143 instruction of mode MODE. */
6145 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6147 switch (GET_MODE_SIZE (mode))
6149 case 1:
6150 return val >= 0 && val < 32;
6152 case 2:
6153 return val >= 0 && val < 64 && (val & 1) == 0;
6155 default:
6156 return (val >= 0
6157 && (val + GET_MODE_SIZE (mode)) <= 128
6158 && (val & 3) == 0);
6162 bool
6163 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6165 if (TARGET_ARM)
6166 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6167 else if (TARGET_THUMB2)
6168 return thumb2_legitimate_address_p (mode, x, strict_p);
6169 else /* if (TARGET_THUMB1) */
6170 return thumb1_legitimate_address_p (mode, x, strict_p);
6173 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6175 Given an rtx X being reloaded into a reg required to be
6176 in class CLASS, return the class of reg to actually use.
6177 In general this is just CLASS, but for the Thumb core registers and
6178 immediate constants we prefer a LO_REGS class or a subset. */
6180 static reg_class_t
6181 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6183 if (TARGET_32BIT)
6184 return rclass;
6185 else
6187 if (rclass == GENERAL_REGS
6188 || rclass == HI_REGS
6189 || rclass == NO_REGS
6190 || rclass == STACK_REG)
6191 return LO_REGS;
6192 else
6193 return rclass;
6197 /* Build the SYMBOL_REF for __tls_get_addr. */
6199 static GTY(()) rtx tls_get_addr_libfunc;
6201 static rtx
6202 get_tls_get_addr (void)
6204 if (!tls_get_addr_libfunc)
6205 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6206 return tls_get_addr_libfunc;
6209 static rtx
6210 arm_load_tp (rtx target)
6212 if (!target)
6213 target = gen_reg_rtx (SImode);
6215 if (TARGET_HARD_TP)
6217 /* Can return in any reg. */
6218 emit_insn (gen_load_tp_hard (target));
6220 else
6222 /* Always returned in r0. Immediately copy the result into a pseudo,
6223 otherwise other uses of r0 (e.g. setting up function arguments) may
6224 clobber the value. */
6226 rtx tmp;
6228 emit_insn (gen_load_tp_soft ());
6230 tmp = gen_rtx_REG (SImode, 0);
6231 emit_move_insn (target, tmp);
6233 return target;
6236 static rtx
6237 load_tls_operand (rtx x, rtx reg)
6239 rtx tmp;
6241 if (reg == NULL_RTX)
6242 reg = gen_reg_rtx (SImode);
6244 tmp = gen_rtx_CONST (SImode, x);
6246 emit_move_insn (reg, tmp);
6248 return reg;
6251 static rtx
6252 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6254 rtx insns, label, labelno, sum;
6256 gcc_assert (reloc != TLS_DESCSEQ);
6257 start_sequence ();
6259 labelno = GEN_INT (pic_labelno++);
6260 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6261 label = gen_rtx_CONST (VOIDmode, label);
6263 sum = gen_rtx_UNSPEC (Pmode,
6264 gen_rtvec (4, x, GEN_INT (reloc), label,
6265 GEN_INT (TARGET_ARM ? 8 : 4)),
6266 UNSPEC_TLS);
6267 reg = load_tls_operand (sum, reg);
6269 if (TARGET_ARM)
6270 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6271 else
6272 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6274 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6275 LCT_PURE, /* LCT_CONST? */
6276 Pmode, 1, reg, Pmode);
6278 insns = get_insns ();
6279 end_sequence ();
6281 return insns;
6284 static rtx
6285 arm_tls_descseq_addr (rtx x, rtx reg)
6287 rtx labelno = GEN_INT (pic_labelno++);
6288 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6289 rtx sum = gen_rtx_UNSPEC (Pmode,
6290 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6291 gen_rtx_CONST (VOIDmode, label),
6292 GEN_INT (!TARGET_ARM)),
6293 UNSPEC_TLS);
6294 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6296 emit_insn (gen_tlscall (x, labelno));
6297 if (!reg)
6298 reg = gen_reg_rtx (SImode);
6299 else
6300 gcc_assert (REGNO (reg) != 0);
6302 emit_move_insn (reg, reg0);
6304 return reg;
6308 legitimize_tls_address (rtx x, rtx reg)
6310 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6311 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6313 switch (model)
6315 case TLS_MODEL_GLOBAL_DYNAMIC:
6316 if (TARGET_GNU2_TLS)
6318 reg = arm_tls_descseq_addr (x, reg);
6320 tp = arm_load_tp (NULL_RTX);
6322 dest = gen_rtx_PLUS (Pmode, tp, reg);
6324 else
6326 /* Original scheme */
6327 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6328 dest = gen_reg_rtx (Pmode);
6329 emit_libcall_block (insns, dest, ret, x);
6331 return dest;
6333 case TLS_MODEL_LOCAL_DYNAMIC:
6334 if (TARGET_GNU2_TLS)
6336 reg = arm_tls_descseq_addr (x, reg);
6338 tp = arm_load_tp (NULL_RTX);
6340 dest = gen_rtx_PLUS (Pmode, tp, reg);
6342 else
6344 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6346 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6347 share the LDM result with other LD model accesses. */
6348 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6349 UNSPEC_TLS);
6350 dest = gen_reg_rtx (Pmode);
6351 emit_libcall_block (insns, dest, ret, eqv);
6353 /* Load the addend. */
6354 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6355 GEN_INT (TLS_LDO32)),
6356 UNSPEC_TLS);
6357 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6358 dest = gen_rtx_PLUS (Pmode, dest, addend);
6360 return dest;
6362 case TLS_MODEL_INITIAL_EXEC:
6363 labelno = GEN_INT (pic_labelno++);
6364 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6365 label = gen_rtx_CONST (VOIDmode, label);
6366 sum = gen_rtx_UNSPEC (Pmode,
6367 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6368 GEN_INT (TARGET_ARM ? 8 : 4)),
6369 UNSPEC_TLS);
6370 reg = load_tls_operand (sum, reg);
6372 if (TARGET_ARM)
6373 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6374 else if (TARGET_THUMB2)
6375 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6376 else
6378 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6379 emit_move_insn (reg, gen_const_mem (SImode, reg));
6382 tp = arm_load_tp (NULL_RTX);
6384 return gen_rtx_PLUS (Pmode, tp, reg);
6386 case TLS_MODEL_LOCAL_EXEC:
6387 tp = arm_load_tp (NULL_RTX);
6389 reg = gen_rtx_UNSPEC (Pmode,
6390 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6391 UNSPEC_TLS);
6392 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6394 return gen_rtx_PLUS (Pmode, tp, reg);
6396 default:
6397 abort ();
6401 /* Try machine-dependent ways of modifying an illegitimate address
6402 to be legitimate. If we find one, return the new, valid address. */
6404 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6406 if (!TARGET_ARM)
6408 /* TODO: legitimize_address for Thumb2. */
6409 if (TARGET_THUMB2)
6410 return x;
6411 return thumb_legitimize_address (x, orig_x, mode);
6414 if (arm_tls_symbol_p (x))
6415 return legitimize_tls_address (x, NULL_RTX);
6417 if (GET_CODE (x) == PLUS)
6419 rtx xop0 = XEXP (x, 0);
6420 rtx xop1 = XEXP (x, 1);
6422 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6423 xop0 = force_reg (SImode, xop0);
6425 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6426 xop1 = force_reg (SImode, xop1);
6428 if (ARM_BASE_REGISTER_RTX_P (xop0)
6429 && GET_CODE (xop1) == CONST_INT)
6431 HOST_WIDE_INT n, low_n;
6432 rtx base_reg, val;
6433 n = INTVAL (xop1);
6435 /* VFP addressing modes actually allow greater offsets, but for
6436 now we just stick with the lowest common denominator. */
6437 if (mode == DImode
6438 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6440 low_n = n & 0x0f;
6441 n &= ~0x0f;
6442 if (low_n > 4)
6444 n += 16;
6445 low_n -= 16;
6448 else
6450 low_n = ((mode) == TImode ? 0
6451 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6452 n -= low_n;
6455 base_reg = gen_reg_rtx (SImode);
6456 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6457 emit_move_insn (base_reg, val);
6458 x = plus_constant (Pmode, base_reg, low_n);
6460 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6461 x = gen_rtx_PLUS (SImode, xop0, xop1);
6464 /* XXX We don't allow MINUS any more -- see comment in
6465 arm_legitimate_address_outer_p (). */
6466 else if (GET_CODE (x) == MINUS)
6468 rtx xop0 = XEXP (x, 0);
6469 rtx xop1 = XEXP (x, 1);
6471 if (CONSTANT_P (xop0))
6472 xop0 = force_reg (SImode, xop0);
6474 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6475 xop1 = force_reg (SImode, xop1);
6477 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6478 x = gen_rtx_MINUS (SImode, xop0, xop1);
6481 /* Make sure to take full advantage of the pre-indexed addressing mode
6482 with absolute addresses which often allows for the base register to
6483 be factorized for multiple adjacent memory references, and it might
6484 even allows for the mini pool to be avoided entirely. */
6485 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6487 unsigned int bits;
6488 HOST_WIDE_INT mask, base, index;
6489 rtx base_reg;
6491 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6492 use a 8-bit index. So let's use a 12-bit index for SImode only and
6493 hope that arm_gen_constant will enable ldrb to use more bits. */
6494 bits = (mode == SImode) ? 12 : 8;
6495 mask = (1 << bits) - 1;
6496 base = INTVAL (x) & ~mask;
6497 index = INTVAL (x) & mask;
6498 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6500 /* It'll most probably be more efficient to generate the base
6501 with more bits set and use a negative index instead. */
6502 base |= mask;
6503 index -= mask;
6505 base_reg = force_reg (SImode, GEN_INT (base));
6506 x = plus_constant (Pmode, base_reg, index);
6509 if (flag_pic)
6511 /* We need to find and carefully transform any SYMBOL and LABEL
6512 references; so go back to the original address expression. */
6513 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6515 if (new_x != orig_x)
6516 x = new_x;
6519 return x;
6523 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6524 to be legitimate. If we find one, return the new, valid address. */
6526 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6528 if (arm_tls_symbol_p (x))
6529 return legitimize_tls_address (x, NULL_RTX);
6531 if (GET_CODE (x) == PLUS
6532 && GET_CODE (XEXP (x, 1)) == CONST_INT
6533 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6534 || INTVAL (XEXP (x, 1)) < 0))
6536 rtx xop0 = XEXP (x, 0);
6537 rtx xop1 = XEXP (x, 1);
6538 HOST_WIDE_INT offset = INTVAL (xop1);
6540 /* Try and fold the offset into a biasing of the base register and
6541 then offsetting that. Don't do this when optimizing for space
6542 since it can cause too many CSEs. */
6543 if (optimize_size && offset >= 0
6544 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6546 HOST_WIDE_INT delta;
6548 if (offset >= 256)
6549 delta = offset - (256 - GET_MODE_SIZE (mode));
6550 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6551 delta = 31 * GET_MODE_SIZE (mode);
6552 else
6553 delta = offset & (~31 * GET_MODE_SIZE (mode));
6555 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6556 NULL_RTX);
6557 x = plus_constant (Pmode, xop0, delta);
6559 else if (offset < 0 && offset > -256)
6560 /* Small negative offsets are best done with a subtract before the
6561 dereference, forcing these into a register normally takes two
6562 instructions. */
6563 x = force_operand (x, NULL_RTX);
6564 else
6566 /* For the remaining cases, force the constant into a register. */
6567 xop1 = force_reg (SImode, xop1);
6568 x = gen_rtx_PLUS (SImode, xop0, xop1);
6571 else if (GET_CODE (x) == PLUS
6572 && s_register_operand (XEXP (x, 1), SImode)
6573 && !s_register_operand (XEXP (x, 0), SImode))
6575 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6577 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6580 if (flag_pic)
6582 /* We need to find and carefully transform any SYMBOL and LABEL
6583 references; so go back to the original address expression. */
6584 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6586 if (new_x != orig_x)
6587 x = new_x;
6590 return x;
6593 bool
6594 arm_legitimize_reload_address (rtx *p,
6595 enum machine_mode mode,
6596 int opnum, int type,
6597 int ind_levels ATTRIBUTE_UNUSED)
6599 /* We must recognize output that we have already generated ourselves. */
6600 if (GET_CODE (*p) == PLUS
6601 && GET_CODE (XEXP (*p, 0)) == PLUS
6602 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6603 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6604 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6606 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6607 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6608 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6609 return true;
6612 if (GET_CODE (*p) == PLUS
6613 && GET_CODE (XEXP (*p, 0)) == REG
6614 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6615 /* If the base register is equivalent to a constant, let the generic
6616 code handle it. Otherwise we will run into problems if a future
6617 reload pass decides to rematerialize the constant. */
6618 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6619 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6621 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6622 HOST_WIDE_INT low, high;
6624 /* Detect coprocessor load/stores. */
6625 bool coproc_p = ((TARGET_HARD_FLOAT
6626 && TARGET_VFP
6627 && (mode == SFmode || mode == DFmode))
6628 || (TARGET_REALLY_IWMMXT
6629 && VALID_IWMMXT_REG_MODE (mode))
6630 || (TARGET_NEON
6631 && (VALID_NEON_DREG_MODE (mode)
6632 || VALID_NEON_QREG_MODE (mode))));
6634 /* For some conditions, bail out when lower two bits are unaligned. */
6635 if ((val & 0x3) != 0
6636 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6637 && (coproc_p
6638 /* For DI, and DF under soft-float: */
6639 || ((mode == DImode || mode == DFmode)
6640 /* Without ldrd, we use stm/ldm, which does not
6641 fair well with unaligned bits. */
6642 && (! TARGET_LDRD
6643 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6644 || TARGET_THUMB2))))
6645 return false;
6647 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6648 of which the (reg+high) gets turned into a reload add insn,
6649 we try to decompose the index into high/low values that can often
6650 also lead to better reload CSE.
6651 For example:
6652 ldr r0, [r2, #4100] // Offset too large
6653 ldr r1, [r2, #4104] // Offset too large
6655 is best reloaded as:
6656 add t1, r2, #4096
6657 ldr r0, [t1, #4]
6658 add t2, r2, #4096
6659 ldr r1, [t2, #8]
6661 which post-reload CSE can simplify in most cases to eliminate the
6662 second add instruction:
6663 add t1, r2, #4096
6664 ldr r0, [t1, #4]
6665 ldr r1, [t1, #8]
6667 The idea here is that we want to split out the bits of the constant
6668 as a mask, rather than as subtracting the maximum offset that the
6669 respective type of load/store used can handle.
6671 When encountering negative offsets, we can still utilize it even if
6672 the overall offset is positive; sometimes this may lead to an immediate
6673 that can be constructed with fewer instructions.
6674 For example:
6675 ldr r0, [r2, #0x3FFFFC]
6677 This is best reloaded as:
6678 add t1, r2, #0x400000
6679 ldr r0, [t1, #-4]
6681 The trick for spotting this for a load insn with N bits of offset
6682 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6683 negative offset that is going to make bit N and all the bits below
6684 it become zero in the remainder part.
6686 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6687 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6688 used in most cases of ARM load/store instructions. */
6690 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6691 (((VAL) & ((1 << (N)) - 1)) \
6692 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6693 : 0)
6695 if (coproc_p)
6697 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6699 /* NEON quad-word load/stores are made of two double-word accesses,
6700 so the valid index range is reduced by 8. Treat as 9-bit range if
6701 we go over it. */
6702 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6703 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6705 else if (GET_MODE_SIZE (mode) == 8)
6707 if (TARGET_LDRD)
6708 low = (TARGET_THUMB2
6709 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6710 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6711 else
6712 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6713 to access doublewords. The supported load/store offsets are
6714 -8, -4, and 4, which we try to produce here. */
6715 low = ((val & 0xf) ^ 0x8) - 0x8;
6717 else if (GET_MODE_SIZE (mode) < 8)
6719 /* NEON element load/stores do not have an offset. */
6720 if (TARGET_NEON_FP16 && mode == HFmode)
6721 return false;
6723 if (TARGET_THUMB2)
6725 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6726 Try the wider 12-bit range first, and re-try if the result
6727 is out of range. */
6728 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6729 if (low < -255)
6730 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6732 else
6734 if (mode == HImode || mode == HFmode)
6736 if (arm_arch4)
6737 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6738 else
6740 /* The storehi/movhi_bytes fallbacks can use only
6741 [-4094,+4094] of the full ldrb/strb index range. */
6742 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6743 if (low == 4095 || low == -4095)
6744 return false;
6747 else
6748 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6751 else
6752 return false;
6754 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6755 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6756 - (unsigned HOST_WIDE_INT) 0x80000000);
6757 /* Check for overflow or zero */
6758 if (low == 0 || high == 0 || (high + low != val))
6759 return false;
6761 /* Reload the high part into a base reg; leave the low part
6762 in the mem. */
6763 *p = gen_rtx_PLUS (GET_MODE (*p),
6764 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6765 GEN_INT (high)),
6766 GEN_INT (low));
6767 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6768 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6769 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6770 return true;
6773 return false;
6777 thumb_legitimize_reload_address (rtx *x_p,
6778 enum machine_mode mode,
6779 int opnum, int type,
6780 int ind_levels ATTRIBUTE_UNUSED)
6782 rtx x = *x_p;
6784 if (GET_CODE (x) == PLUS
6785 && GET_MODE_SIZE (mode) < 4
6786 && REG_P (XEXP (x, 0))
6787 && XEXP (x, 0) == stack_pointer_rtx
6788 && GET_CODE (XEXP (x, 1)) == CONST_INT
6789 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6791 rtx orig_x = x;
6793 x = copy_rtx (x);
6794 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6795 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6796 return x;
6799 /* If both registers are hi-regs, then it's better to reload the
6800 entire expression rather than each register individually. That
6801 only requires one reload register rather than two. */
6802 if (GET_CODE (x) == PLUS
6803 && REG_P (XEXP (x, 0))
6804 && REG_P (XEXP (x, 1))
6805 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6806 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6808 rtx orig_x = x;
6810 x = copy_rtx (x);
6811 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6812 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6813 return x;
6816 return NULL;
6819 /* Test for various thread-local symbols. */
6821 /* Return TRUE if X is a thread-local symbol. */
6823 static bool
6824 arm_tls_symbol_p (rtx x)
6826 if (! TARGET_HAVE_TLS)
6827 return false;
6829 if (GET_CODE (x) != SYMBOL_REF)
6830 return false;
6832 return SYMBOL_REF_TLS_MODEL (x) != 0;
6835 /* Helper for arm_tls_referenced_p. */
6837 static int
6838 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6840 if (GET_CODE (*x) == SYMBOL_REF)
6841 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6843 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6844 TLS offsets, not real symbol references. */
6845 if (GET_CODE (*x) == UNSPEC
6846 && XINT (*x, 1) == UNSPEC_TLS)
6847 return -1;
6849 return 0;
6852 /* Return TRUE if X contains any TLS symbol references. */
6854 bool
6855 arm_tls_referenced_p (rtx x)
6857 if (! TARGET_HAVE_TLS)
6858 return false;
6860 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6863 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6865 On the ARM, allow any integer (invalid ones are removed later by insn
6866 patterns), nice doubles and symbol_refs which refer to the function's
6867 constant pool XXX.
6869 When generating pic allow anything. */
6871 static bool
6872 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6874 /* At present, we have no support for Neon structure constants, so forbid
6875 them here. It might be possible to handle simple cases like 0 and -1
6876 in future. */
6877 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6878 return false;
6880 return flag_pic || !label_mentioned_p (x);
6883 static bool
6884 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6886 return (GET_CODE (x) == CONST_INT
6887 || GET_CODE (x) == CONST_DOUBLE
6888 || CONSTANT_ADDRESS_P (x)
6889 || flag_pic);
6892 static bool
6893 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6895 return (!arm_cannot_force_const_mem (mode, x)
6896 && (TARGET_32BIT
6897 ? arm_legitimate_constant_p_1 (mode, x)
6898 : thumb_legitimate_constant_p (mode, x)));
6901 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6903 static bool
6904 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6906 rtx base, offset;
6908 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6910 split_const (x, &base, &offset);
6911 if (GET_CODE (base) == SYMBOL_REF
6912 && !offset_within_block_p (base, INTVAL (offset)))
6913 return true;
6915 return arm_tls_referenced_p (x);
6918 #define REG_OR_SUBREG_REG(X) \
6919 (GET_CODE (X) == REG \
6920 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6922 #define REG_OR_SUBREG_RTX(X) \
6923 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6925 static inline int
6926 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6928 enum machine_mode mode = GET_MODE (x);
6929 int total;
6931 switch (code)
6933 case ASHIFT:
6934 case ASHIFTRT:
6935 case LSHIFTRT:
6936 case ROTATERT:
6937 case PLUS:
6938 case MINUS:
6939 case COMPARE:
6940 case NEG:
6941 case NOT:
6942 return COSTS_N_INSNS (1);
6944 case MULT:
6945 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6947 int cycles = 0;
6948 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6950 while (i)
6952 i >>= 2;
6953 cycles++;
6955 return COSTS_N_INSNS (2) + cycles;
6957 return COSTS_N_INSNS (1) + 16;
6959 case SET:
6960 return (COSTS_N_INSNS (1)
6961 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6962 + GET_CODE (SET_DEST (x)) == MEM));
6964 case CONST_INT:
6965 if (outer == SET)
6967 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6968 return 0;
6969 if (thumb_shiftable_const (INTVAL (x)))
6970 return COSTS_N_INSNS (2);
6971 return COSTS_N_INSNS (3);
6973 else if ((outer == PLUS || outer == COMPARE)
6974 && INTVAL (x) < 256 && INTVAL (x) > -256)
6975 return 0;
6976 else if ((outer == IOR || outer == XOR || outer == AND)
6977 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6978 return COSTS_N_INSNS (1);
6979 else if (outer == AND)
6981 int i;
6982 /* This duplicates the tests in the andsi3 expander. */
6983 for (i = 9; i <= 31; i++)
6984 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6985 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6986 return COSTS_N_INSNS (2);
6988 else if (outer == ASHIFT || outer == ASHIFTRT
6989 || outer == LSHIFTRT)
6990 return 0;
6991 return COSTS_N_INSNS (2);
6993 case CONST:
6994 case CONST_DOUBLE:
6995 case LABEL_REF:
6996 case SYMBOL_REF:
6997 return COSTS_N_INSNS (3);
6999 case UDIV:
7000 case UMOD:
7001 case DIV:
7002 case MOD:
7003 return 100;
7005 case TRUNCATE:
7006 return 99;
7008 case AND:
7009 case XOR:
7010 case IOR:
7011 /* XXX guess. */
7012 return 8;
7014 case MEM:
7015 /* XXX another guess. */
7016 /* Memory costs quite a lot for the first word, but subsequent words
7017 load at the equivalent of a single insn each. */
7018 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7019 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7020 ? 4 : 0));
7022 case IF_THEN_ELSE:
7023 /* XXX a guess. */
7024 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7025 return 14;
7026 return 2;
7028 case SIGN_EXTEND:
7029 case ZERO_EXTEND:
7030 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7031 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7033 if (mode == SImode)
7034 return total;
7036 if (arm_arch6)
7037 return total + COSTS_N_INSNS (1);
7039 /* Assume a two-shift sequence. Increase the cost slightly so
7040 we prefer actual shifts over an extend operation. */
7041 return total + 1 + COSTS_N_INSNS (2);
7043 default:
7044 return 99;
7048 static inline bool
7049 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7051 enum machine_mode mode = GET_MODE (x);
7052 enum rtx_code subcode;
7053 rtx operand;
7054 enum rtx_code code = GET_CODE (x);
7055 *total = 0;
7057 switch (code)
7059 case MEM:
7060 /* Memory costs quite a lot for the first word, but subsequent words
7061 load at the equivalent of a single insn each. */
7062 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7063 return true;
7065 case DIV:
7066 case MOD:
7067 case UDIV:
7068 case UMOD:
7069 if (TARGET_HARD_FLOAT && mode == SFmode)
7070 *total = COSTS_N_INSNS (2);
7071 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7072 *total = COSTS_N_INSNS (4);
7073 else
7074 *total = COSTS_N_INSNS (20);
7075 return false;
7077 case ROTATE:
7078 if (GET_CODE (XEXP (x, 1)) == REG)
7079 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7080 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7081 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7083 /* Fall through */
7084 case ROTATERT:
7085 if (mode != SImode)
7087 *total += COSTS_N_INSNS (4);
7088 return true;
7091 /* Fall through */
7092 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7093 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7094 if (mode == DImode)
7096 *total += COSTS_N_INSNS (3);
7097 return true;
7100 *total += COSTS_N_INSNS (1);
7101 /* Increase the cost of complex shifts because they aren't any faster,
7102 and reduce dual issue opportunities. */
7103 if (arm_tune_cortex_a9
7104 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7105 ++*total;
7107 return true;
7109 case MINUS:
7110 if (mode == DImode)
7112 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7113 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7114 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7116 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7117 return true;
7120 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7121 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7123 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7124 return true;
7127 return false;
7130 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7132 if (TARGET_HARD_FLOAT
7133 && (mode == SFmode
7134 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7136 *total = COSTS_N_INSNS (1);
7137 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7138 && arm_const_double_rtx (XEXP (x, 0)))
7140 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7141 return true;
7144 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7145 && arm_const_double_rtx (XEXP (x, 1)))
7147 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7148 return true;
7151 return false;
7153 *total = COSTS_N_INSNS (20);
7154 return false;
7157 *total = COSTS_N_INSNS (1);
7158 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7159 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7161 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7162 return true;
7165 subcode = GET_CODE (XEXP (x, 1));
7166 if (subcode == ASHIFT || subcode == ASHIFTRT
7167 || subcode == LSHIFTRT
7168 || subcode == ROTATE || subcode == ROTATERT)
7170 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7171 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7172 return true;
7175 /* A shift as a part of RSB costs no more than RSB itself. */
7176 if (GET_CODE (XEXP (x, 0)) == MULT
7177 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7179 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7180 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7181 return true;
7184 if (subcode == MULT
7185 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7187 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7188 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7189 return true;
7192 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7193 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7195 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7196 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7197 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7198 *total += COSTS_N_INSNS (1);
7200 return true;
7203 /* Fall through */
7205 case PLUS:
7206 if (code == PLUS && arm_arch6 && mode == SImode
7207 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7208 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7210 *total = COSTS_N_INSNS (1);
7211 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7212 0, speed);
7213 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7214 return true;
7217 /* MLA: All arguments must be registers. We filter out
7218 multiplication by a power of two, so that we fall down into
7219 the code below. */
7220 if (GET_CODE (XEXP (x, 0)) == MULT
7221 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7223 /* The cost comes from the cost of the multiply. */
7224 return false;
7227 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7229 if (TARGET_HARD_FLOAT
7230 && (mode == SFmode
7231 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7233 *total = COSTS_N_INSNS (1);
7234 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7235 && arm_const_double_rtx (XEXP (x, 1)))
7237 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7238 return true;
7241 return false;
7244 *total = COSTS_N_INSNS (20);
7245 return false;
7248 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7249 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7251 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7252 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7253 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7254 *total += COSTS_N_INSNS (1);
7255 return true;
7258 /* Fall through */
7260 case AND: case XOR: case IOR:
7262 /* Normally the frame registers will be spilt into reg+const during
7263 reload, so it is a bad idea to combine them with other instructions,
7264 since then they might not be moved outside of loops. As a compromise
7265 we allow integration with ops that have a constant as their second
7266 operand. */
7267 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7268 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7269 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7270 *total = COSTS_N_INSNS (1);
7272 if (mode == DImode)
7274 *total += COSTS_N_INSNS (2);
7275 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7276 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7278 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7279 return true;
7282 return false;
7285 *total += COSTS_N_INSNS (1);
7286 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7287 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7289 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7290 return true;
7292 subcode = GET_CODE (XEXP (x, 0));
7293 if (subcode == ASHIFT || subcode == ASHIFTRT
7294 || subcode == LSHIFTRT
7295 || subcode == ROTATE || subcode == ROTATERT)
7297 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7298 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7299 return true;
7302 if (subcode == MULT
7303 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7305 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7306 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7307 return true;
7310 if (subcode == UMIN || subcode == UMAX
7311 || subcode == SMIN || subcode == SMAX)
7313 *total = COSTS_N_INSNS (3);
7314 return true;
7317 return false;
7319 case MULT:
7320 /* This should have been handled by the CPU specific routines. */
7321 gcc_unreachable ();
7323 case TRUNCATE:
7324 if (arm_arch3m && mode == SImode
7325 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7326 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7327 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7328 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7329 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7330 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7332 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7333 return true;
7335 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7336 return false;
7338 case NEG:
7339 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7341 if (TARGET_HARD_FLOAT
7342 && (mode == SFmode
7343 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7345 *total = COSTS_N_INSNS (1);
7346 return false;
7348 *total = COSTS_N_INSNS (2);
7349 return false;
7352 /* Fall through */
7353 case NOT:
7354 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7355 if (mode == SImode && code == NOT)
7357 subcode = GET_CODE (XEXP (x, 0));
7358 if (subcode == ASHIFT || subcode == ASHIFTRT
7359 || subcode == LSHIFTRT
7360 || subcode == ROTATE || subcode == ROTATERT
7361 || (subcode == MULT
7362 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7364 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7365 /* Register shifts cost an extra cycle. */
7366 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7367 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7368 subcode, 1, speed);
7369 return true;
7373 return false;
7375 case IF_THEN_ELSE:
7376 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7378 *total = COSTS_N_INSNS (4);
7379 return true;
7382 operand = XEXP (x, 0);
7384 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7385 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7386 && GET_CODE (XEXP (operand, 0)) == REG
7387 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7388 *total += COSTS_N_INSNS (1);
7389 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7390 + rtx_cost (XEXP (x, 2), code, 2, speed));
7391 return true;
7393 case NE:
7394 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7396 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7397 return true;
7399 goto scc_insn;
7401 case GE:
7402 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7403 && mode == SImode && XEXP (x, 1) == const0_rtx)
7405 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7406 return true;
7408 goto scc_insn;
7410 case LT:
7411 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7412 && mode == SImode && XEXP (x, 1) == const0_rtx)
7414 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7415 return true;
7417 goto scc_insn;
7419 case EQ:
7420 case GT:
7421 case LE:
7422 case GEU:
7423 case LTU:
7424 case GTU:
7425 case LEU:
7426 case UNORDERED:
7427 case ORDERED:
7428 case UNEQ:
7429 case UNGE:
7430 case UNLT:
7431 case UNGT:
7432 case UNLE:
7433 scc_insn:
7434 /* SCC insns. In the case where the comparison has already been
7435 performed, then they cost 2 instructions. Otherwise they need
7436 an additional comparison before them. */
7437 *total = COSTS_N_INSNS (2);
7438 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7440 return true;
7443 /* Fall through */
7444 case COMPARE:
7445 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7447 *total = 0;
7448 return true;
7451 *total += COSTS_N_INSNS (1);
7452 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7453 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7455 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7456 return true;
7459 subcode = GET_CODE (XEXP (x, 0));
7460 if (subcode == ASHIFT || subcode == ASHIFTRT
7461 || subcode == LSHIFTRT
7462 || subcode == ROTATE || subcode == ROTATERT)
7464 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7465 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7466 return true;
7469 if (subcode == MULT
7470 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7472 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7473 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7474 return true;
7477 return false;
7479 case UMIN:
7480 case UMAX:
7481 case SMIN:
7482 case SMAX:
7483 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7484 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7485 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7486 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7487 return true;
7489 case ABS:
7490 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7492 if (TARGET_HARD_FLOAT
7493 && (mode == SFmode
7494 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7496 *total = COSTS_N_INSNS (1);
7497 return false;
7499 *total = COSTS_N_INSNS (20);
7500 return false;
7502 *total = COSTS_N_INSNS (1);
7503 if (mode == DImode)
7504 *total += COSTS_N_INSNS (3);
7505 return false;
7507 case SIGN_EXTEND:
7508 case ZERO_EXTEND:
7509 *total = 0;
7510 if (GET_MODE_CLASS (mode) == MODE_INT)
7512 rtx op = XEXP (x, 0);
7513 enum machine_mode opmode = GET_MODE (op);
7515 if (mode == DImode)
7516 *total += COSTS_N_INSNS (1);
7518 if (opmode != SImode)
7520 if (MEM_P (op))
7522 /* If !arm_arch4, we use one of the extendhisi2_mem
7523 or movhi_bytes patterns for HImode. For a QImode
7524 sign extension, we first zero-extend from memory
7525 and then perform a shift sequence. */
7526 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7527 *total += COSTS_N_INSNS (2);
7529 else if (arm_arch6)
7530 *total += COSTS_N_INSNS (1);
7532 /* We don't have the necessary insn, so we need to perform some
7533 other operation. */
7534 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7535 /* An and with constant 255. */
7536 *total += COSTS_N_INSNS (1);
7537 else
7538 /* A shift sequence. Increase costs slightly to avoid
7539 combining two shifts into an extend operation. */
7540 *total += COSTS_N_INSNS (2) + 1;
7543 return false;
7546 switch (GET_MODE (XEXP (x, 0)))
7548 case V8QImode:
7549 case V4HImode:
7550 case V2SImode:
7551 case V4QImode:
7552 case V2HImode:
7553 *total = COSTS_N_INSNS (1);
7554 return false;
7556 default:
7557 gcc_unreachable ();
7559 gcc_unreachable ();
7561 case ZERO_EXTRACT:
7562 case SIGN_EXTRACT:
7563 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7564 return true;
7566 case CONST_INT:
7567 if (const_ok_for_arm (INTVAL (x))
7568 || const_ok_for_arm (~INTVAL (x)))
7569 *total = COSTS_N_INSNS (1);
7570 else
7571 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7572 INTVAL (x), NULL_RTX,
7573 NULL_RTX, 0, 0));
7574 return true;
7576 case CONST:
7577 case LABEL_REF:
7578 case SYMBOL_REF:
7579 *total = COSTS_N_INSNS (3);
7580 return true;
7582 case HIGH:
7583 *total = COSTS_N_INSNS (1);
7584 return true;
7586 case LO_SUM:
7587 *total = COSTS_N_INSNS (1);
7588 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7589 return true;
7591 case CONST_DOUBLE:
7592 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7593 && (mode == SFmode || !TARGET_VFP_SINGLE))
7594 *total = COSTS_N_INSNS (1);
7595 else
7596 *total = COSTS_N_INSNS (4);
7597 return true;
7599 case SET:
7600 return false;
7602 case UNSPEC:
7603 /* We cost this as high as our memory costs to allow this to
7604 be hoisted from loops. */
7605 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7607 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7609 return true;
7611 default:
7612 *total = COSTS_N_INSNS (4);
7613 return false;
7617 /* Estimates the size cost of thumb1 instructions.
7618 For now most of the code is copied from thumb1_rtx_costs. We need more
7619 fine grain tuning when we have more related test cases. */
7620 static inline int
7621 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7623 enum machine_mode mode = GET_MODE (x);
7625 switch (code)
7627 case ASHIFT:
7628 case ASHIFTRT:
7629 case LSHIFTRT:
7630 case ROTATERT:
7631 case PLUS:
7632 case MINUS:
7633 case COMPARE:
7634 case NEG:
7635 case NOT:
7636 return COSTS_N_INSNS (1);
7638 case MULT:
7639 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7641 /* Thumb1 mul instruction can't operate on const. We must Load it
7642 into a register first. */
7643 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7644 return COSTS_N_INSNS (1) + const_size;
7646 return COSTS_N_INSNS (1);
7648 case SET:
7649 return (COSTS_N_INSNS (1)
7650 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7651 + GET_CODE (SET_DEST (x)) == MEM));
7653 case CONST_INT:
7654 if (outer == SET)
7656 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7657 return COSTS_N_INSNS (1);
7658 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7659 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7660 return COSTS_N_INSNS (2);
7661 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7662 if (thumb_shiftable_const (INTVAL (x)))
7663 return COSTS_N_INSNS (2);
7664 return COSTS_N_INSNS (3);
7666 else if ((outer == PLUS || outer == COMPARE)
7667 && INTVAL (x) < 256 && INTVAL (x) > -256)
7668 return 0;
7669 else if ((outer == IOR || outer == XOR || outer == AND)
7670 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7671 return COSTS_N_INSNS (1);
7672 else if (outer == AND)
7674 int i;
7675 /* This duplicates the tests in the andsi3 expander. */
7676 for (i = 9; i <= 31; i++)
7677 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7678 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7679 return COSTS_N_INSNS (2);
7681 else if (outer == ASHIFT || outer == ASHIFTRT
7682 || outer == LSHIFTRT)
7683 return 0;
7684 return COSTS_N_INSNS (2);
7686 case CONST:
7687 case CONST_DOUBLE:
7688 case LABEL_REF:
7689 case SYMBOL_REF:
7690 return COSTS_N_INSNS (3);
7692 case UDIV:
7693 case UMOD:
7694 case DIV:
7695 case MOD:
7696 return 100;
7698 case TRUNCATE:
7699 return 99;
7701 case AND:
7702 case XOR:
7703 case IOR:
7704 /* XXX guess. */
7705 return 8;
7707 case MEM:
7708 /* XXX another guess. */
7709 /* Memory costs quite a lot for the first word, but subsequent words
7710 load at the equivalent of a single insn each. */
7711 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7712 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7713 ? 4 : 0));
7715 case IF_THEN_ELSE:
7716 /* XXX a guess. */
7717 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7718 return 14;
7719 return 2;
7721 case ZERO_EXTEND:
7722 /* XXX still guessing. */
7723 switch (GET_MODE (XEXP (x, 0)))
7725 case QImode:
7726 return (1 + (mode == DImode ? 4 : 0)
7727 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7729 case HImode:
7730 return (4 + (mode == DImode ? 4 : 0)
7731 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7733 case SImode:
7734 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7736 default:
7737 return 99;
7740 default:
7741 return 99;
7745 /* RTX costs when optimizing for size. */
7746 static bool
7747 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7748 int *total)
7750 enum machine_mode mode = GET_MODE (x);
7751 if (TARGET_THUMB1)
7753 *total = thumb1_size_rtx_costs (x, code, outer_code);
7754 return true;
7757 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7758 switch (code)
7760 case MEM:
7761 /* A memory access costs 1 insn if the mode is small, or the address is
7762 a single register, otherwise it costs one insn per word. */
7763 if (REG_P (XEXP (x, 0)))
7764 *total = COSTS_N_INSNS (1);
7765 else if (flag_pic
7766 && GET_CODE (XEXP (x, 0)) == PLUS
7767 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7768 /* This will be split into two instructions.
7769 See arm.md:calculate_pic_address. */
7770 *total = COSTS_N_INSNS (2);
7771 else
7772 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7773 return true;
7775 case DIV:
7776 case MOD:
7777 case UDIV:
7778 case UMOD:
7779 /* Needs a libcall, so it costs about this. */
7780 *total = COSTS_N_INSNS (2);
7781 return false;
7783 case ROTATE:
7784 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7786 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7787 return true;
7789 /* Fall through */
7790 case ROTATERT:
7791 case ASHIFT:
7792 case LSHIFTRT:
7793 case ASHIFTRT:
7794 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7796 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7797 return true;
7799 else if (mode == SImode)
7801 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7802 /* Slightly disparage register shifts, but not by much. */
7803 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7804 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7805 return true;
7808 /* Needs a libcall. */
7809 *total = COSTS_N_INSNS (2);
7810 return false;
7812 case MINUS:
7813 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7814 && (mode == SFmode || !TARGET_VFP_SINGLE))
7816 *total = COSTS_N_INSNS (1);
7817 return false;
7820 if (mode == SImode)
7822 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7823 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7825 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7826 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7827 || subcode1 == ROTATE || subcode1 == ROTATERT
7828 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7829 || subcode1 == ASHIFTRT)
7831 /* It's just the cost of the two operands. */
7832 *total = 0;
7833 return false;
7836 *total = COSTS_N_INSNS (1);
7837 return false;
7840 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7841 return false;
7843 case PLUS:
7844 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7845 && (mode == SFmode || !TARGET_VFP_SINGLE))
7847 *total = COSTS_N_INSNS (1);
7848 return false;
7851 /* A shift as a part of ADD costs nothing. */
7852 if (GET_CODE (XEXP (x, 0)) == MULT
7853 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7855 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7856 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7857 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7858 return true;
7861 /* Fall through */
7862 case AND: case XOR: case IOR:
7863 if (mode == SImode)
7865 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7867 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7868 || subcode == LSHIFTRT || subcode == ASHIFTRT
7869 || (code == AND && subcode == NOT))
7871 /* It's just the cost of the two operands. */
7872 *total = 0;
7873 return false;
7877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7878 return false;
7880 case MULT:
7881 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7882 return false;
7884 case NEG:
7885 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7886 && (mode == SFmode || !TARGET_VFP_SINGLE))
7888 *total = COSTS_N_INSNS (1);
7889 return false;
7892 /* Fall through */
7893 case NOT:
7894 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7896 return false;
7898 case IF_THEN_ELSE:
7899 *total = 0;
7900 return false;
7902 case COMPARE:
7903 if (cc_register (XEXP (x, 0), VOIDmode))
7904 * total = 0;
7905 else
7906 *total = COSTS_N_INSNS (1);
7907 return false;
7909 case ABS:
7910 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7911 && (mode == SFmode || !TARGET_VFP_SINGLE))
7912 *total = COSTS_N_INSNS (1);
7913 else
7914 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7915 return false;
7917 case SIGN_EXTEND:
7918 case ZERO_EXTEND:
7919 return arm_rtx_costs_1 (x, outer_code, total, 0);
7921 case CONST_INT:
7922 if (const_ok_for_arm (INTVAL (x)))
7923 /* A multiplication by a constant requires another instruction
7924 to load the constant to a register. */
7925 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7926 ? 1 : 0);
7927 else if (const_ok_for_arm (~INTVAL (x)))
7928 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7929 else if (const_ok_for_arm (-INTVAL (x)))
7931 if (outer_code == COMPARE || outer_code == PLUS
7932 || outer_code == MINUS)
7933 *total = 0;
7934 else
7935 *total = COSTS_N_INSNS (1);
7937 else
7938 *total = COSTS_N_INSNS (2);
7939 return true;
7941 case CONST:
7942 case LABEL_REF:
7943 case SYMBOL_REF:
7944 *total = COSTS_N_INSNS (2);
7945 return true;
7947 case CONST_DOUBLE:
7948 *total = COSTS_N_INSNS (4);
7949 return true;
7951 case HIGH:
7952 case LO_SUM:
7953 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7954 cost of these slightly. */
7955 *total = COSTS_N_INSNS (1) + 1;
7956 return true;
7958 case SET:
7959 return false;
7961 default:
7962 if (mode != VOIDmode)
7963 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7964 else
7965 *total = COSTS_N_INSNS (4); /* How knows? */
7966 return false;
7970 /* RTX costs when optimizing for size. */
7971 static bool
7972 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7973 int *total, bool speed)
7975 if (!speed)
7976 return arm_size_rtx_costs (x, (enum rtx_code) code,
7977 (enum rtx_code) outer_code, total);
7978 else
7979 return current_tune->rtx_costs (x, (enum rtx_code) code,
7980 (enum rtx_code) outer_code,
7981 total, speed);
7984 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7985 supported on any "slowmul" cores, so it can be ignored. */
7987 static bool
7988 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7989 int *total, bool speed)
7991 enum machine_mode mode = GET_MODE (x);
7993 if (TARGET_THUMB)
7995 *total = thumb1_rtx_costs (x, code, outer_code);
7996 return true;
7999 switch (code)
8001 case MULT:
8002 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8003 || mode == DImode)
8005 *total = COSTS_N_INSNS (20);
8006 return false;
8009 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8011 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8012 & (unsigned HOST_WIDE_INT) 0xffffffff);
8013 int cost, const_ok = const_ok_for_arm (i);
8014 int j, booth_unit_size;
8016 /* Tune as appropriate. */
8017 cost = const_ok ? 4 : 8;
8018 booth_unit_size = 2;
8019 for (j = 0; i && j < 32; j += booth_unit_size)
8021 i >>= booth_unit_size;
8022 cost++;
8025 *total = COSTS_N_INSNS (cost);
8026 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8027 return true;
8030 *total = COSTS_N_INSNS (20);
8031 return false;
8033 default:
8034 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8039 /* RTX cost for cores with a fast multiply unit (M variants). */
8041 static bool
8042 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8043 int *total, bool speed)
8045 enum machine_mode mode = GET_MODE (x);
8047 if (TARGET_THUMB1)
8049 *total = thumb1_rtx_costs (x, code, outer_code);
8050 return true;
8053 /* ??? should thumb2 use different costs? */
8054 switch (code)
8056 case MULT:
8057 /* There is no point basing this on the tuning, since it is always the
8058 fast variant if it exists at all. */
8059 if (mode == DImode
8060 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8061 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8062 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8064 *total = COSTS_N_INSNS(2);
8065 return false;
8069 if (mode == DImode)
8071 *total = COSTS_N_INSNS (5);
8072 return false;
8075 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8077 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8078 & (unsigned HOST_WIDE_INT) 0xffffffff);
8079 int cost, const_ok = const_ok_for_arm (i);
8080 int j, booth_unit_size;
8082 /* Tune as appropriate. */
8083 cost = const_ok ? 4 : 8;
8084 booth_unit_size = 8;
8085 for (j = 0; i && j < 32; j += booth_unit_size)
8087 i >>= booth_unit_size;
8088 cost++;
8091 *total = COSTS_N_INSNS(cost);
8092 return false;
8095 if (mode == SImode)
8097 *total = COSTS_N_INSNS (4);
8098 return false;
8101 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8103 if (TARGET_HARD_FLOAT
8104 && (mode == SFmode
8105 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8107 *total = COSTS_N_INSNS (1);
8108 return false;
8112 /* Requires a lib call */
8113 *total = COSTS_N_INSNS (20);
8114 return false;
8116 default:
8117 return arm_rtx_costs_1 (x, outer_code, total, speed);
8122 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8123 so it can be ignored. */
8125 static bool
8126 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8127 int *total, bool speed)
8129 enum machine_mode mode = GET_MODE (x);
8131 if (TARGET_THUMB)
8133 *total = thumb1_rtx_costs (x, code, outer_code);
8134 return true;
8137 switch (code)
8139 case COMPARE:
8140 if (GET_CODE (XEXP (x, 0)) != MULT)
8141 return arm_rtx_costs_1 (x, outer_code, total, speed);
8143 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8144 will stall until the multiplication is complete. */
8145 *total = COSTS_N_INSNS (3);
8146 return false;
8148 case MULT:
8149 /* There is no point basing this on the tuning, since it is always the
8150 fast variant if it exists at all. */
8151 if (mode == DImode
8152 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8153 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8154 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8156 *total = COSTS_N_INSNS (2);
8157 return false;
8161 if (mode == DImode)
8163 *total = COSTS_N_INSNS (5);
8164 return false;
8167 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8169 /* If operand 1 is a constant we can more accurately
8170 calculate the cost of the multiply. The multiplier can
8171 retire 15 bits on the first cycle and a further 12 on the
8172 second. We do, of course, have to load the constant into
8173 a register first. */
8174 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8175 /* There's a general overhead of one cycle. */
8176 int cost = 1;
8177 unsigned HOST_WIDE_INT masked_const;
8179 if (i & 0x80000000)
8180 i = ~i;
8182 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8184 masked_const = i & 0xffff8000;
8185 if (masked_const != 0)
8187 cost++;
8188 masked_const = i & 0xf8000000;
8189 if (masked_const != 0)
8190 cost++;
8192 *total = COSTS_N_INSNS (cost);
8193 return false;
8196 if (mode == SImode)
8198 *total = COSTS_N_INSNS (3);
8199 return false;
8202 /* Requires a lib call */
8203 *total = COSTS_N_INSNS (20);
8204 return false;
8206 default:
8207 return arm_rtx_costs_1 (x, outer_code, total, speed);
8212 /* RTX costs for 9e (and later) cores. */
8214 static bool
8215 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8216 int *total, bool speed)
8218 enum machine_mode mode = GET_MODE (x);
8220 if (TARGET_THUMB1)
8222 switch (code)
8224 case MULT:
8225 *total = COSTS_N_INSNS (3);
8226 return true;
8228 default:
8229 *total = thumb1_rtx_costs (x, code, outer_code);
8230 return true;
8234 switch (code)
8236 case MULT:
8237 /* There is no point basing this on the tuning, since it is always the
8238 fast variant if it exists at all. */
8239 if (mode == DImode
8240 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8241 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8242 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8244 *total = COSTS_N_INSNS (2);
8245 return false;
8249 if (mode == DImode)
8251 *total = COSTS_N_INSNS (5);
8252 return false;
8255 if (mode == SImode)
8257 *total = COSTS_N_INSNS (2);
8258 return false;
8261 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8263 if (TARGET_HARD_FLOAT
8264 && (mode == SFmode
8265 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8267 *total = COSTS_N_INSNS (1);
8268 return false;
8272 *total = COSTS_N_INSNS (20);
8273 return false;
8275 default:
8276 return arm_rtx_costs_1 (x, outer_code, total, speed);
8279 /* All address computations that can be done are free, but rtx cost returns
8280 the same for practically all of them. So we weight the different types
8281 of address here in the order (most pref first):
8282 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8283 static inline int
8284 arm_arm_address_cost (rtx x)
8286 enum rtx_code c = GET_CODE (x);
8288 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8289 return 0;
8290 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8291 return 10;
8293 if (c == PLUS)
8295 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8296 return 2;
8298 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8299 return 3;
8301 return 4;
8304 return 6;
8307 static inline int
8308 arm_thumb_address_cost (rtx x)
8310 enum rtx_code c = GET_CODE (x);
8312 if (c == REG)
8313 return 1;
8314 if (c == PLUS
8315 && GET_CODE (XEXP (x, 0)) == REG
8316 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8317 return 1;
8319 return 2;
8322 static int
8323 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8325 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8328 /* Adjust cost hook for XScale. */
8329 static bool
8330 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8332 /* Some true dependencies can have a higher cost depending
8333 on precisely how certain input operands are used. */
8334 if (REG_NOTE_KIND(link) == 0
8335 && recog_memoized (insn) >= 0
8336 && recog_memoized (dep) >= 0)
8338 int shift_opnum = get_attr_shift (insn);
8339 enum attr_type attr_type = get_attr_type (dep);
8341 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8342 operand for INSN. If we have a shifted input operand and the
8343 instruction we depend on is another ALU instruction, then we may
8344 have to account for an additional stall. */
8345 if (shift_opnum != 0
8346 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8348 rtx shifted_operand;
8349 int opno;
8351 /* Get the shifted operand. */
8352 extract_insn (insn);
8353 shifted_operand = recog_data.operand[shift_opnum];
8355 /* Iterate over all the operands in DEP. If we write an operand
8356 that overlaps with SHIFTED_OPERAND, then we have increase the
8357 cost of this dependency. */
8358 extract_insn (dep);
8359 preprocess_constraints ();
8360 for (opno = 0; opno < recog_data.n_operands; opno++)
8362 /* We can ignore strict inputs. */
8363 if (recog_data.operand_type[opno] == OP_IN)
8364 continue;
8366 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8367 shifted_operand))
8369 *cost = 2;
8370 return false;
8375 return true;
8378 /* Adjust cost hook for Cortex A9. */
8379 static bool
8380 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8382 switch (REG_NOTE_KIND (link))
8384 case REG_DEP_ANTI:
8385 *cost = 0;
8386 return false;
8388 case REG_DEP_TRUE:
8389 case REG_DEP_OUTPUT:
8390 if (recog_memoized (insn) >= 0
8391 && recog_memoized (dep) >= 0)
8393 if (GET_CODE (PATTERN (insn)) == SET)
8395 if (GET_MODE_CLASS
8396 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8397 || GET_MODE_CLASS
8398 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8400 enum attr_type attr_type_insn = get_attr_type (insn);
8401 enum attr_type attr_type_dep = get_attr_type (dep);
8403 /* By default all dependencies of the form
8404 s0 = s0 <op> s1
8405 s0 = s0 <op> s2
8406 have an extra latency of 1 cycle because
8407 of the input and output dependency in this
8408 case. However this gets modeled as an true
8409 dependency and hence all these checks. */
8410 if (REG_P (SET_DEST (PATTERN (insn)))
8411 && REG_P (SET_DEST (PATTERN (dep)))
8412 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8413 SET_DEST (PATTERN (dep))))
8415 /* FMACS is a special case where the dependent
8416 instruction can be issued 3 cycles before
8417 the normal latency in case of an output
8418 dependency. */
8419 if ((attr_type_insn == TYPE_FMACS
8420 || attr_type_insn == TYPE_FMACD)
8421 && (attr_type_dep == TYPE_FMACS
8422 || attr_type_dep == TYPE_FMACD))
8424 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8425 *cost = insn_default_latency (dep) - 3;
8426 else
8427 *cost = insn_default_latency (dep);
8428 return false;
8430 else
8432 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8433 *cost = insn_default_latency (dep) + 1;
8434 else
8435 *cost = insn_default_latency (dep);
8437 return false;
8442 break;
8444 default:
8445 gcc_unreachable ();
8448 return true;
8451 /* Adjust cost hook for FA726TE. */
8452 static bool
8453 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8455 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8456 have penalty of 3. */
8457 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8458 && recog_memoized (insn) >= 0
8459 && recog_memoized (dep) >= 0
8460 && get_attr_conds (dep) == CONDS_SET)
8462 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8463 if (get_attr_conds (insn) == CONDS_USE
8464 && get_attr_type (insn) != TYPE_BRANCH)
8466 *cost = 3;
8467 return false;
8470 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8471 || get_attr_conds (insn) == CONDS_USE)
8473 *cost = 0;
8474 return false;
8478 return true;
8481 /* Implement TARGET_REGISTER_MOVE_COST.
8483 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8484 it is typically more expensive than a single memory access. We set
8485 the cost to less than two memory accesses so that floating
8486 point to integer conversion does not go through memory. */
8489 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8490 reg_class_t from, reg_class_t to)
8492 if (TARGET_32BIT)
8494 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8495 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8496 return 15;
8497 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8498 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8499 return 4;
8500 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8501 return 20;
8502 else
8503 return 2;
8505 else
8507 if (from == HI_REGS || to == HI_REGS)
8508 return 4;
8509 else
8510 return 2;
8514 /* Implement TARGET_MEMORY_MOVE_COST. */
8517 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8518 bool in ATTRIBUTE_UNUSED)
8520 if (TARGET_32BIT)
8521 return 10;
8522 else
8524 if (GET_MODE_SIZE (mode) < 4)
8525 return 8;
8526 else
8527 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8531 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8532 It corrects the value of COST based on the relationship between
8533 INSN and DEP through the dependence LINK. It returns the new
8534 value. There is a per-core adjust_cost hook to adjust scheduler costs
8535 and the per-core hook can choose to completely override the generic
8536 adjust_cost function. Only put bits of code into arm_adjust_cost that
8537 are common across all cores. */
8538 static int
8539 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8541 rtx i_pat, d_pat;
8543 /* When generating Thumb-1 code, we want to place flag-setting operations
8544 close to a conditional branch which depends on them, so that we can
8545 omit the comparison. */
8546 if (TARGET_THUMB1
8547 && REG_NOTE_KIND (link) == 0
8548 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8549 && recog_memoized (dep) >= 0
8550 && get_attr_conds (dep) == CONDS_SET)
8551 return 0;
8553 if (current_tune->sched_adjust_cost != NULL)
8555 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8556 return cost;
8559 /* XXX Is this strictly true? */
8560 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8561 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8562 return 0;
8564 /* Call insns don't incur a stall, even if they follow a load. */
8565 if (REG_NOTE_KIND (link) == 0
8566 && GET_CODE (insn) == CALL_INSN)
8567 return 1;
8569 if ((i_pat = single_set (insn)) != NULL
8570 && GET_CODE (SET_SRC (i_pat)) == MEM
8571 && (d_pat = single_set (dep)) != NULL
8572 && GET_CODE (SET_DEST (d_pat)) == MEM)
8574 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8575 /* This is a load after a store, there is no conflict if the load reads
8576 from a cached area. Assume that loads from the stack, and from the
8577 constant pool are cached, and that others will miss. This is a
8578 hack. */
8580 if ((GET_CODE (src_mem) == SYMBOL_REF
8581 && CONSTANT_POOL_ADDRESS_P (src_mem))
8582 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8583 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8584 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8585 return 1;
8588 return cost;
8591 static int
8592 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8594 if (TARGET_32BIT)
8595 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8596 else
8597 return (optimize > 0) ? 2 : 0;
8600 static int
8601 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8603 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8606 static bool fp_consts_inited = false;
8608 static REAL_VALUE_TYPE value_fp0;
8610 static void
8611 init_fp_table (void)
8613 REAL_VALUE_TYPE r;
8615 r = REAL_VALUE_ATOF ("0", DFmode);
8616 value_fp0 = r;
8617 fp_consts_inited = true;
8620 /* Return TRUE if rtx X is a valid immediate FP constant. */
8622 arm_const_double_rtx (rtx x)
8624 REAL_VALUE_TYPE r;
8626 if (!fp_consts_inited)
8627 init_fp_table ();
8629 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8630 if (REAL_VALUE_MINUS_ZERO (r))
8631 return 0;
8633 if (REAL_VALUES_EQUAL (r, value_fp0))
8634 return 1;
8636 return 0;
8639 /* VFPv3 has a fairly wide range of representable immediates, formed from
8640 "quarter-precision" floating-point values. These can be evaluated using this
8641 formula (with ^ for exponentiation):
8643 -1^s * n * 2^-r
8645 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8646 16 <= n <= 31 and 0 <= r <= 7.
8648 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8650 - A (most-significant) is the sign bit.
8651 - BCD are the exponent (encoded as r XOR 3).
8652 - EFGH are the mantissa (encoded as n - 16).
8655 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8656 fconst[sd] instruction, or -1 if X isn't suitable. */
8657 static int
8658 vfp3_const_double_index (rtx x)
8660 REAL_VALUE_TYPE r, m;
8661 int sign, exponent;
8662 unsigned HOST_WIDE_INT mantissa, mant_hi;
8663 unsigned HOST_WIDE_INT mask;
8664 HOST_WIDE_INT m1, m2;
8665 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8667 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8668 return -1;
8670 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8672 /* We can't represent these things, so detect them first. */
8673 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8674 return -1;
8676 /* Extract sign, exponent and mantissa. */
8677 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8678 r = real_value_abs (&r);
8679 exponent = REAL_EXP (&r);
8680 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8681 highest (sign) bit, with a fixed binary point at bit point_pos.
8682 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8683 bits for the mantissa, this may fail (low bits would be lost). */
8684 real_ldexp (&m, &r, point_pos - exponent);
8685 REAL_VALUE_TO_INT (&m1, &m2, m);
8686 mantissa = m1;
8687 mant_hi = m2;
8689 /* If there are bits set in the low part of the mantissa, we can't
8690 represent this value. */
8691 if (mantissa != 0)
8692 return -1;
8694 /* Now make it so that mantissa contains the most-significant bits, and move
8695 the point_pos to indicate that the least-significant bits have been
8696 discarded. */
8697 point_pos -= HOST_BITS_PER_WIDE_INT;
8698 mantissa = mant_hi;
8700 /* We can permit four significant bits of mantissa only, plus a high bit
8701 which is always 1. */
8702 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8703 if ((mantissa & mask) != 0)
8704 return -1;
8706 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8707 mantissa >>= point_pos - 5;
8709 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8710 floating-point immediate zero with Neon using an integer-zero load, but
8711 that case is handled elsewhere.) */
8712 if (mantissa == 0)
8713 return -1;
8715 gcc_assert (mantissa >= 16 && mantissa <= 31);
8717 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8718 normalized significands are in the range [1, 2). (Our mantissa is shifted
8719 left 4 places at this point relative to normalized IEEE754 values). GCC
8720 internally uses [0.5, 1) (see real.c), so the exponent returned from
8721 REAL_EXP must be altered. */
8722 exponent = 5 - exponent;
8724 if (exponent < 0 || exponent > 7)
8725 return -1;
8727 /* Sign, mantissa and exponent are now in the correct form to plug into the
8728 formula described in the comment above. */
8729 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8732 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8734 vfp3_const_double_rtx (rtx x)
8736 if (!TARGET_VFP3)
8737 return 0;
8739 return vfp3_const_double_index (x) != -1;
8742 /* Recognize immediates which can be used in various Neon instructions. Legal
8743 immediates are described by the following table (for VMVN variants, the
8744 bitwise inverse of the constant shown is recognized. In either case, VMOV
8745 is output and the correct instruction to use for a given constant is chosen
8746 by the assembler). The constant shown is replicated across all elements of
8747 the destination vector.
8749 insn elems variant constant (binary)
8750 ---- ----- ------- -----------------
8751 vmov i32 0 00000000 00000000 00000000 abcdefgh
8752 vmov i32 1 00000000 00000000 abcdefgh 00000000
8753 vmov i32 2 00000000 abcdefgh 00000000 00000000
8754 vmov i32 3 abcdefgh 00000000 00000000 00000000
8755 vmov i16 4 00000000 abcdefgh
8756 vmov i16 5 abcdefgh 00000000
8757 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8758 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8759 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8760 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8761 vmvn i16 10 00000000 abcdefgh
8762 vmvn i16 11 abcdefgh 00000000
8763 vmov i32 12 00000000 00000000 abcdefgh 11111111
8764 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8765 vmov i32 14 00000000 abcdefgh 11111111 11111111
8766 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8767 vmov i8 16 abcdefgh
8768 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8769 eeeeeeee ffffffff gggggggg hhhhhhhh
8770 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8772 For case 18, B = !b. Representable values are exactly those accepted by
8773 vfp3_const_double_index, but are output as floating-point numbers rather
8774 than indices.
8776 Variants 0-5 (inclusive) may also be used as immediates for the second
8777 operand of VORR/VBIC instructions.
8779 The INVERSE argument causes the bitwise inverse of the given operand to be
8780 recognized instead (used for recognizing legal immediates for the VAND/VORN
8781 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8782 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8783 output, rather than the real insns vbic/vorr).
8785 INVERSE makes no difference to the recognition of float vectors.
8787 The return value is the variant of immediate as shown in the above table, or
8788 -1 if the given value doesn't match any of the listed patterns.
8790 static int
8791 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8792 rtx *modconst, int *elementwidth)
8794 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8795 matches = 1; \
8796 for (i = 0; i < idx; i += (STRIDE)) \
8797 if (!(TEST)) \
8798 matches = 0; \
8799 if (matches) \
8801 immtype = (CLASS); \
8802 elsize = (ELSIZE); \
8803 break; \
8806 unsigned int i, elsize = 0, idx = 0, n_elts;
8807 unsigned int innersize;
8808 unsigned char bytes[16];
8809 int immtype = -1, matches;
8810 unsigned int invmask = inverse ? 0xff : 0;
8811 bool vector = GET_CODE (op) == CONST_VECTOR;
8813 if (vector)
8815 n_elts = CONST_VECTOR_NUNITS (op);
8816 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8818 else
8820 n_elts = 1;
8821 if (mode == VOIDmode)
8822 mode = DImode;
8823 innersize = GET_MODE_SIZE (mode);
8826 /* Vectors of float constants. */
8827 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8829 rtx el0 = CONST_VECTOR_ELT (op, 0);
8830 REAL_VALUE_TYPE r0;
8832 if (!vfp3_const_double_rtx (el0))
8833 return -1;
8835 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8837 for (i = 1; i < n_elts; i++)
8839 rtx elt = CONST_VECTOR_ELT (op, i);
8840 REAL_VALUE_TYPE re;
8842 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8844 if (!REAL_VALUES_EQUAL (r0, re))
8845 return -1;
8848 if (modconst)
8849 *modconst = CONST_VECTOR_ELT (op, 0);
8851 if (elementwidth)
8852 *elementwidth = 0;
8854 return 18;
8857 /* Splat vector constant out into a byte vector. */
8858 for (i = 0; i < n_elts; i++)
8860 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8861 unsigned HOST_WIDE_INT elpart;
8862 unsigned int part, parts;
8864 if (GET_CODE (el) == CONST_INT)
8866 elpart = INTVAL (el);
8867 parts = 1;
8869 else if (GET_CODE (el) == CONST_DOUBLE)
8871 elpart = CONST_DOUBLE_LOW (el);
8872 parts = 2;
8874 else
8875 gcc_unreachable ();
8877 for (part = 0; part < parts; part++)
8879 unsigned int byte;
8880 for (byte = 0; byte < innersize; byte++)
8882 bytes[idx++] = (elpart & 0xff) ^ invmask;
8883 elpart >>= BITS_PER_UNIT;
8885 if (GET_CODE (el) == CONST_DOUBLE)
8886 elpart = CONST_DOUBLE_HIGH (el);
8890 /* Sanity check. */
8891 gcc_assert (idx == GET_MODE_SIZE (mode));
8895 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8896 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8898 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8899 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8901 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8902 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8904 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8905 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8907 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8909 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8911 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8912 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8914 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8915 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8917 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8918 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8920 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8921 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8923 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8925 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8927 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8928 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8930 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8931 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8933 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8934 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8936 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8937 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8939 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8941 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8942 && bytes[i] == bytes[(i + 8) % idx]);
8944 while (0);
8946 if (immtype == -1)
8947 return -1;
8949 if (elementwidth)
8950 *elementwidth = elsize;
8952 if (modconst)
8954 unsigned HOST_WIDE_INT imm = 0;
8956 /* Un-invert bytes of recognized vector, if necessary. */
8957 if (invmask != 0)
8958 for (i = 0; i < idx; i++)
8959 bytes[i] ^= invmask;
8961 if (immtype == 17)
8963 /* FIXME: Broken on 32-bit H_W_I hosts. */
8964 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8966 for (i = 0; i < 8; i++)
8967 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8968 << (i * BITS_PER_UNIT);
8970 *modconst = GEN_INT (imm);
8972 else
8974 unsigned HOST_WIDE_INT imm = 0;
8976 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8977 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8979 *modconst = GEN_INT (imm);
8983 return immtype;
8984 #undef CHECK
8987 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8988 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8989 float elements), and a modified constant (whatever should be output for a
8990 VMOV) in *MODCONST. */
8993 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8994 rtx *modconst, int *elementwidth)
8996 rtx tmpconst;
8997 int tmpwidth;
8998 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9000 if (retval == -1)
9001 return 0;
9003 if (modconst)
9004 *modconst = tmpconst;
9006 if (elementwidth)
9007 *elementwidth = tmpwidth;
9009 return 1;
9012 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9013 the immediate is valid, write a constant suitable for using as an operand
9014 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9015 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9018 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9019 rtx *modconst, int *elementwidth)
9021 rtx tmpconst;
9022 int tmpwidth;
9023 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9025 if (retval < 0 || retval > 5)
9026 return 0;
9028 if (modconst)
9029 *modconst = tmpconst;
9031 if (elementwidth)
9032 *elementwidth = tmpwidth;
9034 return 1;
9037 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9038 the immediate is valid, write a constant suitable for using as an operand
9039 to VSHR/VSHL to *MODCONST and the corresponding element width to
9040 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9041 because they have different limitations. */
9044 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9045 rtx *modconst, int *elementwidth,
9046 bool isleftshift)
9048 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9049 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9050 unsigned HOST_WIDE_INT last_elt = 0;
9051 unsigned HOST_WIDE_INT maxshift;
9053 /* Split vector constant out into a byte vector. */
9054 for (i = 0; i < n_elts; i++)
9056 rtx el = CONST_VECTOR_ELT (op, i);
9057 unsigned HOST_WIDE_INT elpart;
9059 if (GET_CODE (el) == CONST_INT)
9060 elpart = INTVAL (el);
9061 else if (GET_CODE (el) == CONST_DOUBLE)
9062 return 0;
9063 else
9064 gcc_unreachable ();
9066 if (i != 0 && elpart != last_elt)
9067 return 0;
9069 last_elt = elpart;
9072 /* Shift less than element size. */
9073 maxshift = innersize * 8;
9075 if (isleftshift)
9077 /* Left shift immediate value can be from 0 to <size>-1. */
9078 if (last_elt >= maxshift)
9079 return 0;
9081 else
9083 /* Right shift immediate value can be from 1 to <size>. */
9084 if (last_elt == 0 || last_elt > maxshift)
9085 return 0;
9088 if (elementwidth)
9089 *elementwidth = innersize * 8;
9091 if (modconst)
9092 *modconst = CONST_VECTOR_ELT (op, 0);
9094 return 1;
9097 /* Return a string suitable for output of Neon immediate logic operation
9098 MNEM. */
9100 char *
9101 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9102 int inverse, int quad)
9104 int width, is_valid;
9105 static char templ[40];
9107 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9109 gcc_assert (is_valid != 0);
9111 if (quad)
9112 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9113 else
9114 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9116 return templ;
9119 /* Return a string suitable for output of Neon immediate shift operation
9120 (VSHR or VSHL) MNEM. */
9122 char *
9123 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9124 enum machine_mode mode, int quad,
9125 bool isleftshift)
9127 int width, is_valid;
9128 static char templ[40];
9130 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9131 gcc_assert (is_valid != 0);
9133 if (quad)
9134 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9135 else
9136 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9138 return templ;
9141 /* Output a sequence of pairwise operations to implement a reduction.
9142 NOTE: We do "too much work" here, because pairwise operations work on two
9143 registers-worth of operands in one go. Unfortunately we can't exploit those
9144 extra calculations to do the full operation in fewer steps, I don't think.
9145 Although all vector elements of the result but the first are ignored, we
9146 actually calculate the same result in each of the elements. An alternative
9147 such as initially loading a vector with zero to use as each of the second
9148 operands would use up an additional register and take an extra instruction,
9149 for no particular gain. */
9151 void
9152 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9153 rtx (*reduc) (rtx, rtx, rtx))
9155 enum machine_mode inner = GET_MODE_INNER (mode);
9156 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9157 rtx tmpsum = op1;
9159 for (i = parts / 2; i >= 1; i /= 2)
9161 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9162 emit_insn (reduc (dest, tmpsum, tmpsum));
9163 tmpsum = dest;
9167 /* If VALS is a vector constant that can be loaded into a register
9168 using VDUP, generate instructions to do so and return an RTX to
9169 assign to the register. Otherwise return NULL_RTX. */
9171 static rtx
9172 neon_vdup_constant (rtx vals)
9174 enum machine_mode mode = GET_MODE (vals);
9175 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9176 int n_elts = GET_MODE_NUNITS (mode);
9177 bool all_same = true;
9178 rtx x;
9179 int i;
9181 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9182 return NULL_RTX;
9184 for (i = 0; i < n_elts; ++i)
9186 x = XVECEXP (vals, 0, i);
9187 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9188 all_same = false;
9191 if (!all_same)
9192 /* The elements are not all the same. We could handle repeating
9193 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9194 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9195 vdup.i16). */
9196 return NULL_RTX;
9198 /* We can load this constant by using VDUP and a constant in a
9199 single ARM register. This will be cheaper than a vector
9200 load. */
9202 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9203 return gen_rtx_VEC_DUPLICATE (mode, x);
9206 /* Generate code to load VALS, which is a PARALLEL containing only
9207 constants (for vec_init) or CONST_VECTOR, efficiently into a
9208 register. Returns an RTX to copy into the register, or NULL_RTX
9209 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9212 neon_make_constant (rtx vals)
9214 enum machine_mode mode = GET_MODE (vals);
9215 rtx target;
9216 rtx const_vec = NULL_RTX;
9217 int n_elts = GET_MODE_NUNITS (mode);
9218 int n_const = 0;
9219 int i;
9221 if (GET_CODE (vals) == CONST_VECTOR)
9222 const_vec = vals;
9223 else if (GET_CODE (vals) == PARALLEL)
9225 /* A CONST_VECTOR must contain only CONST_INTs and
9226 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9227 Only store valid constants in a CONST_VECTOR. */
9228 for (i = 0; i < n_elts; ++i)
9230 rtx x = XVECEXP (vals, 0, i);
9231 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9232 n_const++;
9234 if (n_const == n_elts)
9235 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9237 else
9238 gcc_unreachable ();
9240 if (const_vec != NULL
9241 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9242 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9243 return const_vec;
9244 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9245 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9246 pipeline cycle; creating the constant takes one or two ARM
9247 pipeline cycles. */
9248 return target;
9249 else if (const_vec != NULL_RTX)
9250 /* Load from constant pool. On Cortex-A8 this takes two cycles
9251 (for either double or quad vectors). We can not take advantage
9252 of single-cycle VLD1 because we need a PC-relative addressing
9253 mode. */
9254 return const_vec;
9255 else
9256 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9257 We can not construct an initializer. */
9258 return NULL_RTX;
9261 /* Initialize vector TARGET to VALS. */
9263 void
9264 neon_expand_vector_init (rtx target, rtx vals)
9266 enum machine_mode mode = GET_MODE (target);
9267 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9268 int n_elts = GET_MODE_NUNITS (mode);
9269 int n_var = 0, one_var = -1;
9270 bool all_same = true;
9271 rtx x, mem;
9272 int i;
9274 for (i = 0; i < n_elts; ++i)
9276 x = XVECEXP (vals, 0, i);
9277 if (!CONSTANT_P (x))
9278 ++n_var, one_var = i;
9280 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9281 all_same = false;
9284 if (n_var == 0)
9286 rtx constant = neon_make_constant (vals);
9287 if (constant != NULL_RTX)
9289 emit_move_insn (target, constant);
9290 return;
9294 /* Splat a single non-constant element if we can. */
9295 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9297 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9298 emit_insn (gen_rtx_SET (VOIDmode, target,
9299 gen_rtx_VEC_DUPLICATE (mode, x)));
9300 return;
9303 /* One field is non-constant. Load constant then overwrite varying
9304 field. This is more efficient than using the stack. */
9305 if (n_var == 1)
9307 rtx copy = copy_rtx (vals);
9308 rtx index = GEN_INT (one_var);
9310 /* Load constant part of vector, substitute neighboring value for
9311 varying element. */
9312 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9313 neon_expand_vector_init (target, copy);
9315 /* Insert variable. */
9316 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9317 switch (mode)
9319 case V8QImode:
9320 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9321 break;
9322 case V16QImode:
9323 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9324 break;
9325 case V4HImode:
9326 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9327 break;
9328 case V8HImode:
9329 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9330 break;
9331 case V2SImode:
9332 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9333 break;
9334 case V4SImode:
9335 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9336 break;
9337 case V2SFmode:
9338 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9339 break;
9340 case V4SFmode:
9341 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9342 break;
9343 case V2DImode:
9344 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9345 break;
9346 default:
9347 gcc_unreachable ();
9349 return;
9352 /* Construct the vector in memory one field at a time
9353 and load the whole vector. */
9354 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9355 for (i = 0; i < n_elts; i++)
9356 emit_move_insn (adjust_address_nv (mem, inner_mode,
9357 i * GET_MODE_SIZE (inner_mode)),
9358 XVECEXP (vals, 0, i));
9359 emit_move_insn (target, mem);
9362 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9363 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9364 reported source locations are bogus. */
9366 static void
9367 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9368 const char *err)
9370 HOST_WIDE_INT lane;
9372 gcc_assert (GET_CODE (operand) == CONST_INT);
9374 lane = INTVAL (operand);
9376 if (lane < low || lane >= high)
9377 error (err);
9380 /* Bounds-check lanes. */
9382 void
9383 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9385 bounds_check (operand, low, high, "lane out of range");
9388 /* Bounds-check constants. */
9390 void
9391 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9393 bounds_check (operand, low, high, "constant out of range");
9396 HOST_WIDE_INT
9397 neon_element_bits (enum machine_mode mode)
9399 if (mode == DImode)
9400 return GET_MODE_BITSIZE (mode);
9401 else
9402 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9406 /* Predicates for `match_operand' and `match_operator'. */
9408 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9409 WB is true if full writeback address modes are allowed and is false
9410 if limited writeback address modes (POST_INC and PRE_DEC) are
9411 allowed. */
9414 arm_coproc_mem_operand (rtx op, bool wb)
9416 rtx ind;
9418 /* Reject eliminable registers. */
9419 if (! (reload_in_progress || reload_completed)
9420 && ( reg_mentioned_p (frame_pointer_rtx, op)
9421 || reg_mentioned_p (arg_pointer_rtx, op)
9422 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9423 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9424 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9425 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9426 return FALSE;
9428 /* Constants are converted into offsets from labels. */
9429 if (GET_CODE (op) != MEM)
9430 return FALSE;
9432 ind = XEXP (op, 0);
9434 if (reload_completed
9435 && (GET_CODE (ind) == LABEL_REF
9436 || (GET_CODE (ind) == CONST
9437 && GET_CODE (XEXP (ind, 0)) == PLUS
9438 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9439 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9440 return TRUE;
9442 /* Match: (mem (reg)). */
9443 if (GET_CODE (ind) == REG)
9444 return arm_address_register_rtx_p (ind, 0);
9446 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9447 acceptable in any case (subject to verification by
9448 arm_address_register_rtx_p). We need WB to be true to accept
9449 PRE_INC and POST_DEC. */
9450 if (GET_CODE (ind) == POST_INC
9451 || GET_CODE (ind) == PRE_DEC
9452 || (wb
9453 && (GET_CODE (ind) == PRE_INC
9454 || GET_CODE (ind) == POST_DEC)))
9455 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9457 if (wb
9458 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9459 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9460 && GET_CODE (XEXP (ind, 1)) == PLUS
9461 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9462 ind = XEXP (ind, 1);
9464 /* Match:
9465 (plus (reg)
9466 (const)). */
9467 if (GET_CODE (ind) == PLUS
9468 && GET_CODE (XEXP (ind, 0)) == REG
9469 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9470 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9471 && INTVAL (XEXP (ind, 1)) > -1024
9472 && INTVAL (XEXP (ind, 1)) < 1024
9473 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9474 return TRUE;
9476 return FALSE;
9479 /* Return TRUE if OP is a memory operand which we can load or store a vector
9480 to/from. TYPE is one of the following values:
9481 0 - Vector load/stor (vldr)
9482 1 - Core registers (ldm)
9483 2 - Element/structure loads (vld1)
9486 neon_vector_mem_operand (rtx op, int type)
9488 rtx ind;
9490 /* Reject eliminable registers. */
9491 if (! (reload_in_progress || reload_completed)
9492 && ( reg_mentioned_p (frame_pointer_rtx, op)
9493 || reg_mentioned_p (arg_pointer_rtx, op)
9494 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9495 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9496 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9497 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9498 return FALSE;
9500 /* Constants are converted into offsets from labels. */
9501 if (GET_CODE (op) != MEM)
9502 return FALSE;
9504 ind = XEXP (op, 0);
9506 if (reload_completed
9507 && (GET_CODE (ind) == LABEL_REF
9508 || (GET_CODE (ind) == CONST
9509 && GET_CODE (XEXP (ind, 0)) == PLUS
9510 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9511 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9512 return TRUE;
9514 /* Match: (mem (reg)). */
9515 if (GET_CODE (ind) == REG)
9516 return arm_address_register_rtx_p (ind, 0);
9518 /* Allow post-increment with Neon registers. */
9519 if ((type != 1 && GET_CODE (ind) == POST_INC)
9520 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9521 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9523 /* FIXME: vld1 allows register post-modify. */
9525 /* Match:
9526 (plus (reg)
9527 (const)). */
9528 if (type == 0
9529 && GET_CODE (ind) == PLUS
9530 && GET_CODE (XEXP (ind, 0)) == REG
9531 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9532 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9533 && INTVAL (XEXP (ind, 1)) > -1024
9534 && INTVAL (XEXP (ind, 1)) < 1016
9535 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9536 return TRUE;
9538 return FALSE;
9541 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9542 type. */
9544 neon_struct_mem_operand (rtx op)
9546 rtx ind;
9548 /* Reject eliminable registers. */
9549 if (! (reload_in_progress || reload_completed)
9550 && ( reg_mentioned_p (frame_pointer_rtx, op)
9551 || reg_mentioned_p (arg_pointer_rtx, op)
9552 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9553 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9554 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9555 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9556 return FALSE;
9558 /* Constants are converted into offsets from labels. */
9559 if (GET_CODE (op) != MEM)
9560 return FALSE;
9562 ind = XEXP (op, 0);
9564 if (reload_completed
9565 && (GET_CODE (ind) == LABEL_REF
9566 || (GET_CODE (ind) == CONST
9567 && GET_CODE (XEXP (ind, 0)) == PLUS
9568 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9569 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9570 return TRUE;
9572 /* Match: (mem (reg)). */
9573 if (GET_CODE (ind) == REG)
9574 return arm_address_register_rtx_p (ind, 0);
9576 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9577 if (GET_CODE (ind) == POST_INC
9578 || GET_CODE (ind) == PRE_DEC)
9579 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9581 return FALSE;
9584 /* Return true if X is a register that will be eliminated later on. */
9586 arm_eliminable_register (rtx x)
9588 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9589 || REGNO (x) == ARG_POINTER_REGNUM
9590 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9591 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9594 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9595 coprocessor registers. Otherwise return NO_REGS. */
9597 enum reg_class
9598 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9600 if (mode == HFmode)
9602 if (!TARGET_NEON_FP16)
9603 return GENERAL_REGS;
9604 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9605 return NO_REGS;
9606 return GENERAL_REGS;
9609 /* The neon move patterns handle all legitimate vector and struct
9610 addresses. */
9611 if (TARGET_NEON
9612 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9613 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9614 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9615 || VALID_NEON_STRUCT_MODE (mode)))
9616 return NO_REGS;
9618 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9619 return NO_REGS;
9621 return GENERAL_REGS;
9624 /* Values which must be returned in the most-significant end of the return
9625 register. */
9627 static bool
9628 arm_return_in_msb (const_tree valtype)
9630 return (TARGET_AAPCS_BASED
9631 && BYTES_BIG_ENDIAN
9632 && (AGGREGATE_TYPE_P (valtype)
9633 || TREE_CODE (valtype) == COMPLEX_TYPE
9634 || FIXED_POINT_TYPE_P (valtype)));
9637 /* Return TRUE if X references a SYMBOL_REF. */
9639 symbol_mentioned_p (rtx x)
9641 const char * fmt;
9642 int i;
9644 if (GET_CODE (x) == SYMBOL_REF)
9645 return 1;
9647 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9648 are constant offsets, not symbols. */
9649 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9650 return 0;
9652 fmt = GET_RTX_FORMAT (GET_CODE (x));
9654 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9656 if (fmt[i] == 'E')
9658 int j;
9660 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9661 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9662 return 1;
9664 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9665 return 1;
9668 return 0;
9671 /* Return TRUE if X references a LABEL_REF. */
9673 label_mentioned_p (rtx x)
9675 const char * fmt;
9676 int i;
9678 if (GET_CODE (x) == LABEL_REF)
9679 return 1;
9681 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9682 instruction, but they are constant offsets, not symbols. */
9683 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9684 return 0;
9686 fmt = GET_RTX_FORMAT (GET_CODE (x));
9687 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9689 if (fmt[i] == 'E')
9691 int j;
9693 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9694 if (label_mentioned_p (XVECEXP (x, i, j)))
9695 return 1;
9697 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9698 return 1;
9701 return 0;
9705 tls_mentioned_p (rtx x)
9707 switch (GET_CODE (x))
9709 case CONST:
9710 return tls_mentioned_p (XEXP (x, 0));
9712 case UNSPEC:
9713 if (XINT (x, 1) == UNSPEC_TLS)
9714 return 1;
9716 default:
9717 return 0;
9721 /* Must not copy any rtx that uses a pc-relative address. */
9723 static int
9724 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9726 if (GET_CODE (*x) == UNSPEC
9727 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9728 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9729 return 1;
9730 return 0;
9733 static bool
9734 arm_cannot_copy_insn_p (rtx insn)
9736 /* The tls call insn cannot be copied, as it is paired with a data
9737 word. */
9738 if (recog_memoized (insn) == CODE_FOR_tlscall)
9739 return true;
9741 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9744 enum rtx_code
9745 minmax_code (rtx x)
9747 enum rtx_code code = GET_CODE (x);
9749 switch (code)
9751 case SMAX:
9752 return GE;
9753 case SMIN:
9754 return LE;
9755 case UMIN:
9756 return LEU;
9757 case UMAX:
9758 return GEU;
9759 default:
9760 gcc_unreachable ();
9764 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9766 bool
9767 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9768 int *mask, bool *signed_sat)
9770 /* The high bound must be a power of two minus one. */
9771 int log = exact_log2 (INTVAL (hi_bound) + 1);
9772 if (log == -1)
9773 return false;
9775 /* The low bound is either zero (for usat) or one less than the
9776 negation of the high bound (for ssat). */
9777 if (INTVAL (lo_bound) == 0)
9779 if (mask)
9780 *mask = log;
9781 if (signed_sat)
9782 *signed_sat = false;
9784 return true;
9787 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9789 if (mask)
9790 *mask = log + 1;
9791 if (signed_sat)
9792 *signed_sat = true;
9794 return true;
9797 return false;
9800 /* Return 1 if memory locations are adjacent. */
9802 adjacent_mem_locations (rtx a, rtx b)
9804 /* We don't guarantee to preserve the order of these memory refs. */
9805 if (volatile_refs_p (a) || volatile_refs_p (b))
9806 return 0;
9808 if ((GET_CODE (XEXP (a, 0)) == REG
9809 || (GET_CODE (XEXP (a, 0)) == PLUS
9810 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9811 && (GET_CODE (XEXP (b, 0)) == REG
9812 || (GET_CODE (XEXP (b, 0)) == PLUS
9813 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9815 HOST_WIDE_INT val0 = 0, val1 = 0;
9816 rtx reg0, reg1;
9817 int val_diff;
9819 if (GET_CODE (XEXP (a, 0)) == PLUS)
9821 reg0 = XEXP (XEXP (a, 0), 0);
9822 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9824 else
9825 reg0 = XEXP (a, 0);
9827 if (GET_CODE (XEXP (b, 0)) == PLUS)
9829 reg1 = XEXP (XEXP (b, 0), 0);
9830 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9832 else
9833 reg1 = XEXP (b, 0);
9835 /* Don't accept any offset that will require multiple
9836 instructions to handle, since this would cause the
9837 arith_adjacentmem pattern to output an overlong sequence. */
9838 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9839 return 0;
9841 /* Don't allow an eliminable register: register elimination can make
9842 the offset too large. */
9843 if (arm_eliminable_register (reg0))
9844 return 0;
9846 val_diff = val1 - val0;
9848 if (arm_ld_sched)
9850 /* If the target has load delay slots, then there's no benefit
9851 to using an ldm instruction unless the offset is zero and
9852 we are optimizing for size. */
9853 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9854 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9855 && (val_diff == 4 || val_diff == -4));
9858 return ((REGNO (reg0) == REGNO (reg1))
9859 && (val_diff == 4 || val_diff == -4));
9862 return 0;
9865 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9866 for load operations, false for store operations. CONSECUTIVE is true
9867 if the register numbers in the operation must be consecutive in the register
9868 bank. RETURN_PC is true if value is to be loaded in PC.
9869 The pattern we are trying to match for load is:
9870 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9871 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9874 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9876 where
9877 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9878 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9879 3. If consecutive is TRUE, then for kth register being loaded,
9880 REGNO (R_dk) = REGNO (R_d0) + k.
9881 The pattern for store is similar. */
9882 bool
9883 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
9884 bool consecutive, bool return_pc)
9886 HOST_WIDE_INT count = XVECLEN (op, 0);
9887 rtx reg, mem, addr;
9888 unsigned regno;
9889 unsigned first_regno;
9890 HOST_WIDE_INT i = 1, base = 0, offset = 0;
9891 rtx elt;
9892 bool addr_reg_in_reglist = false;
9893 bool update = false;
9894 int reg_increment;
9895 int offset_adj;
9896 int regs_per_val;
9898 /* If not in SImode, then registers must be consecutive
9899 (e.g., VLDM instructions for DFmode). */
9900 gcc_assert ((mode == SImode) || consecutive);
9901 /* Setting return_pc for stores is illegal. */
9902 gcc_assert (!return_pc || load);
9904 /* Set up the increments and the regs per val based on the mode. */
9905 reg_increment = GET_MODE_SIZE (mode);
9906 regs_per_val = reg_increment / 4;
9907 offset_adj = return_pc ? 1 : 0;
9909 if (count <= 1
9910 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
9911 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
9912 return false;
9914 /* Check if this is a write-back. */
9915 elt = XVECEXP (op, 0, offset_adj);
9916 if (GET_CODE (SET_SRC (elt)) == PLUS)
9918 i++;
9919 base = 1;
9920 update = true;
9922 /* The offset adjustment must be the number of registers being
9923 popped times the size of a single register. */
9924 if (!REG_P (SET_DEST (elt))
9925 || !REG_P (XEXP (SET_SRC (elt), 0))
9926 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
9927 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
9928 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
9929 ((count - 1 - offset_adj) * reg_increment))
9930 return false;
9933 i = i + offset_adj;
9934 base = base + offset_adj;
9935 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
9936 success depends on the type: VLDM can do just one reg,
9937 LDM must do at least two. */
9938 if ((count <= i) && (mode == SImode))
9939 return false;
9941 elt = XVECEXP (op, 0, i - 1);
9942 if (GET_CODE (elt) != SET)
9943 return false;
9945 if (load)
9947 reg = SET_DEST (elt);
9948 mem = SET_SRC (elt);
9950 else
9952 reg = SET_SRC (elt);
9953 mem = SET_DEST (elt);
9956 if (!REG_P (reg) || !MEM_P (mem))
9957 return false;
9959 regno = REGNO (reg);
9960 first_regno = regno;
9961 addr = XEXP (mem, 0);
9962 if (GET_CODE (addr) == PLUS)
9964 if (!CONST_INT_P (XEXP (addr, 1)))
9965 return false;
9967 offset = INTVAL (XEXP (addr, 1));
9968 addr = XEXP (addr, 0);
9971 if (!REG_P (addr))
9972 return false;
9974 /* Don't allow SP to be loaded unless it is also the base register. It
9975 guarantees that SP is reset correctly when an LDM instruction
9976 is interruptted. Otherwise, we might end up with a corrupt stack. */
9977 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
9978 return false;
9980 for (; i < count; i++)
9982 elt = XVECEXP (op, 0, i);
9983 if (GET_CODE (elt) != SET)
9984 return false;
9986 if (load)
9988 reg = SET_DEST (elt);
9989 mem = SET_SRC (elt);
9991 else
9993 reg = SET_SRC (elt);
9994 mem = SET_DEST (elt);
9997 if (!REG_P (reg)
9998 || GET_MODE (reg) != mode
9999 || REGNO (reg) <= regno
10000 || (consecutive
10001 && (REGNO (reg) !=
10002 (unsigned int) (first_regno + regs_per_val * (i - base))))
10003 /* Don't allow SP to be loaded unless it is also the base register. It
10004 guarantees that SP is reset correctly when an LDM instruction
10005 is interrupted. Otherwise, we might end up with a corrupt stack. */
10006 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10007 || !MEM_P (mem)
10008 || GET_MODE (mem) != mode
10009 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10010 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10011 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10012 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10013 offset + (i - base) * reg_increment))
10014 && (!REG_P (XEXP (mem, 0))
10015 || offset + (i - base) * reg_increment != 0)))
10016 return false;
10018 regno = REGNO (reg);
10019 if (regno == REGNO (addr))
10020 addr_reg_in_reglist = true;
10023 if (load)
10025 if (update && addr_reg_in_reglist)
10026 return false;
10028 /* For Thumb-1, address register is always modified - either by write-back
10029 or by explicit load. If the pattern does not describe an update,
10030 then the address register must be in the list of loaded registers. */
10031 if (TARGET_THUMB1)
10032 return update || addr_reg_in_reglist;
10035 return true;
10038 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10039 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10040 instruction. ADD_OFFSET is nonzero if the base address register needs
10041 to be modified with an add instruction before we can use it. */
10043 static bool
10044 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10045 int nops, HOST_WIDE_INT add_offset)
10047 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10048 if the offset isn't small enough. The reason 2 ldrs are faster
10049 is because these ARMs are able to do more than one cache access
10050 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10051 whilst the ARM8 has a double bandwidth cache. This means that
10052 these cores can do both an instruction fetch and a data fetch in
10053 a single cycle, so the trick of calculating the address into a
10054 scratch register (one of the result regs) and then doing a load
10055 multiple actually becomes slower (and no smaller in code size).
10056 That is the transformation
10058 ldr rd1, [rbase + offset]
10059 ldr rd2, [rbase + offset + 4]
10063 add rd1, rbase, offset
10064 ldmia rd1, {rd1, rd2}
10066 produces worse code -- '3 cycles + any stalls on rd2' instead of
10067 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10068 access per cycle, the first sequence could never complete in less
10069 than 6 cycles, whereas the ldm sequence would only take 5 and
10070 would make better use of sequential accesses if not hitting the
10071 cache.
10073 We cheat here and test 'arm_ld_sched' which we currently know to
10074 only be true for the ARM8, ARM9 and StrongARM. If this ever
10075 changes, then the test below needs to be reworked. */
10076 if (nops == 2 && arm_ld_sched && add_offset != 0)
10077 return false;
10079 /* XScale has load-store double instructions, but they have stricter
10080 alignment requirements than load-store multiple, so we cannot
10081 use them.
10083 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10084 the pipeline until completion.
10086 NREGS CYCLES
10092 An ldr instruction takes 1-3 cycles, but does not block the
10093 pipeline.
10095 NREGS CYCLES
10096 1 1-3
10097 2 2-6
10098 3 3-9
10099 4 4-12
10101 Best case ldr will always win. However, the more ldr instructions
10102 we issue, the less likely we are to be able to schedule them well.
10103 Using ldr instructions also increases code size.
10105 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10106 for counts of 3 or 4 regs. */
10107 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10108 return false;
10109 return true;
10112 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10113 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10114 an array ORDER which describes the sequence to use when accessing the
10115 offsets that produces an ascending order. In this sequence, each
10116 offset must be larger by exactly 4 than the previous one. ORDER[0]
10117 must have been filled in with the lowest offset by the caller.
10118 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10119 we use to verify that ORDER produces an ascending order of registers.
10120 Return true if it was possible to construct such an order, false if
10121 not. */
10123 static bool
10124 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10125 int *unsorted_regs)
10127 int i;
10128 for (i = 1; i < nops; i++)
10130 int j;
10132 order[i] = order[i - 1];
10133 for (j = 0; j < nops; j++)
10134 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10136 /* We must find exactly one offset that is higher than the
10137 previous one by 4. */
10138 if (order[i] != order[i - 1])
10139 return false;
10140 order[i] = j;
10142 if (order[i] == order[i - 1])
10143 return false;
10144 /* The register numbers must be ascending. */
10145 if (unsorted_regs != NULL
10146 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10147 return false;
10149 return true;
10152 /* Used to determine in a peephole whether a sequence of load
10153 instructions can be changed into a load-multiple instruction.
10154 NOPS is the number of separate load instructions we are examining. The
10155 first NOPS entries in OPERANDS are the destination registers, the
10156 next NOPS entries are memory operands. If this function is
10157 successful, *BASE is set to the common base register of the memory
10158 accesses; *LOAD_OFFSET is set to the first memory location's offset
10159 from that base register.
10160 REGS is an array filled in with the destination register numbers.
10161 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10162 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10163 the sequence of registers in REGS matches the loads from ascending memory
10164 locations, and the function verifies that the register numbers are
10165 themselves ascending. If CHECK_REGS is false, the register numbers
10166 are stored in the order they are found in the operands. */
10167 static int
10168 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10169 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10171 int unsorted_regs[MAX_LDM_STM_OPS];
10172 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10173 int order[MAX_LDM_STM_OPS];
10174 rtx base_reg_rtx = NULL;
10175 int base_reg = -1;
10176 int i, ldm_case;
10178 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10179 easily extended if required. */
10180 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10182 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10184 /* Loop over the operands and check that the memory references are
10185 suitable (i.e. immediate offsets from the same base register). At
10186 the same time, extract the target register, and the memory
10187 offsets. */
10188 for (i = 0; i < nops; i++)
10190 rtx reg;
10191 rtx offset;
10193 /* Convert a subreg of a mem into the mem itself. */
10194 if (GET_CODE (operands[nops + i]) == SUBREG)
10195 operands[nops + i] = alter_subreg (operands + (nops + i));
10197 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10199 /* Don't reorder volatile memory references; it doesn't seem worth
10200 looking for the case where the order is ok anyway. */
10201 if (MEM_VOLATILE_P (operands[nops + i]))
10202 return 0;
10204 offset = const0_rtx;
10206 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10207 || (GET_CODE (reg) == SUBREG
10208 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10209 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10210 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10211 == REG)
10212 || (GET_CODE (reg) == SUBREG
10213 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10214 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10215 == CONST_INT)))
10217 if (i == 0)
10219 base_reg = REGNO (reg);
10220 base_reg_rtx = reg;
10221 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10222 return 0;
10224 else if (base_reg != (int) REGNO (reg))
10225 /* Not addressed from the same base register. */
10226 return 0;
10228 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10229 ? REGNO (operands[i])
10230 : REGNO (SUBREG_REG (operands[i])));
10232 /* If it isn't an integer register, or if it overwrites the
10233 base register but isn't the last insn in the list, then
10234 we can't do this. */
10235 if (unsorted_regs[i] < 0
10236 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10237 || unsorted_regs[i] > 14
10238 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10239 return 0;
10241 unsorted_offsets[i] = INTVAL (offset);
10242 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10243 order[0] = i;
10245 else
10246 /* Not a suitable memory address. */
10247 return 0;
10250 /* All the useful information has now been extracted from the
10251 operands into unsorted_regs and unsorted_offsets; additionally,
10252 order[0] has been set to the lowest offset in the list. Sort
10253 the offsets into order, verifying that they are adjacent, and
10254 check that the register numbers are ascending. */
10255 if (!compute_offset_order (nops, unsorted_offsets, order,
10256 check_regs ? unsorted_regs : NULL))
10257 return 0;
10259 if (saved_order)
10260 memcpy (saved_order, order, sizeof order);
10262 if (base)
10264 *base = base_reg;
10266 for (i = 0; i < nops; i++)
10267 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10269 *load_offset = unsorted_offsets[order[0]];
10272 if (TARGET_THUMB1
10273 && !peep2_reg_dead_p (nops, base_reg_rtx))
10274 return 0;
10276 if (unsorted_offsets[order[0]] == 0)
10277 ldm_case = 1; /* ldmia */
10278 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10279 ldm_case = 2; /* ldmib */
10280 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10281 ldm_case = 3; /* ldmda */
10282 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10283 ldm_case = 4; /* ldmdb */
10284 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10285 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10286 ldm_case = 5;
10287 else
10288 return 0;
10290 if (!multiple_operation_profitable_p (false, nops,
10291 ldm_case == 5
10292 ? unsorted_offsets[order[0]] : 0))
10293 return 0;
10295 return ldm_case;
10298 /* Used to determine in a peephole whether a sequence of store instructions can
10299 be changed into a store-multiple instruction.
10300 NOPS is the number of separate store instructions we are examining.
10301 NOPS_TOTAL is the total number of instructions recognized by the peephole
10302 pattern.
10303 The first NOPS entries in OPERANDS are the source registers, the next
10304 NOPS entries are memory operands. If this function is successful, *BASE is
10305 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10306 to the first memory location's offset from that base register. REGS is an
10307 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10308 likewise filled with the corresponding rtx's.
10309 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10310 numbers to an ascending order of stores.
10311 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10312 from ascending memory locations, and the function verifies that the register
10313 numbers are themselves ascending. If CHECK_REGS is false, the register
10314 numbers are stored in the order they are found in the operands. */
10315 static int
10316 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10317 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10318 HOST_WIDE_INT *load_offset, bool check_regs)
10320 int unsorted_regs[MAX_LDM_STM_OPS];
10321 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10322 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10323 int order[MAX_LDM_STM_OPS];
10324 int base_reg = -1;
10325 rtx base_reg_rtx = NULL;
10326 int i, stm_case;
10328 /* Write back of base register is currently only supported for Thumb 1. */
10329 int base_writeback = TARGET_THUMB1;
10331 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10332 easily extended if required. */
10333 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10335 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10337 /* Loop over the operands and check that the memory references are
10338 suitable (i.e. immediate offsets from the same base register). At
10339 the same time, extract the target register, and the memory
10340 offsets. */
10341 for (i = 0; i < nops; i++)
10343 rtx reg;
10344 rtx offset;
10346 /* Convert a subreg of a mem into the mem itself. */
10347 if (GET_CODE (operands[nops + i]) == SUBREG)
10348 operands[nops + i] = alter_subreg (operands + (nops + i));
10350 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10352 /* Don't reorder volatile memory references; it doesn't seem worth
10353 looking for the case where the order is ok anyway. */
10354 if (MEM_VOLATILE_P (operands[nops + i]))
10355 return 0;
10357 offset = const0_rtx;
10359 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10360 || (GET_CODE (reg) == SUBREG
10361 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10362 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10363 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10364 == REG)
10365 || (GET_CODE (reg) == SUBREG
10366 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10367 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10368 == CONST_INT)))
10370 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10371 ? operands[i] : SUBREG_REG (operands[i]));
10372 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10374 if (i == 0)
10376 base_reg = REGNO (reg);
10377 base_reg_rtx = reg;
10378 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10379 return 0;
10381 else if (base_reg != (int) REGNO (reg))
10382 /* Not addressed from the same base register. */
10383 return 0;
10385 /* If it isn't an integer register, then we can't do this. */
10386 if (unsorted_regs[i] < 0
10387 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10388 /* The effects are unpredictable if the base register is
10389 both updated and stored. */
10390 || (base_writeback && unsorted_regs[i] == base_reg)
10391 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10392 || unsorted_regs[i] > 14)
10393 return 0;
10395 unsorted_offsets[i] = INTVAL (offset);
10396 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10397 order[0] = i;
10399 else
10400 /* Not a suitable memory address. */
10401 return 0;
10404 /* All the useful information has now been extracted from the
10405 operands into unsorted_regs and unsorted_offsets; additionally,
10406 order[0] has been set to the lowest offset in the list. Sort
10407 the offsets into order, verifying that they are adjacent, and
10408 check that the register numbers are ascending. */
10409 if (!compute_offset_order (nops, unsorted_offsets, order,
10410 check_regs ? unsorted_regs : NULL))
10411 return 0;
10413 if (saved_order)
10414 memcpy (saved_order, order, sizeof order);
10416 if (base)
10418 *base = base_reg;
10420 for (i = 0; i < nops; i++)
10422 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10423 if (reg_rtxs)
10424 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10427 *load_offset = unsorted_offsets[order[0]];
10430 if (TARGET_THUMB1
10431 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10432 return 0;
10434 if (unsorted_offsets[order[0]] == 0)
10435 stm_case = 1; /* stmia */
10436 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10437 stm_case = 2; /* stmib */
10438 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10439 stm_case = 3; /* stmda */
10440 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10441 stm_case = 4; /* stmdb */
10442 else
10443 return 0;
10445 if (!multiple_operation_profitable_p (false, nops, 0))
10446 return 0;
10448 return stm_case;
10451 /* Routines for use in generating RTL. */
10453 /* Generate a load-multiple instruction. COUNT is the number of loads in
10454 the instruction; REGS and MEMS are arrays containing the operands.
10455 BASEREG is the base register to be used in addressing the memory operands.
10456 WBACK_OFFSET is nonzero if the instruction should update the base
10457 register. */
10459 static rtx
10460 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10461 HOST_WIDE_INT wback_offset)
10463 int i = 0, j;
10464 rtx result;
10466 if (!multiple_operation_profitable_p (false, count, 0))
10468 rtx seq;
10470 start_sequence ();
10472 for (i = 0; i < count; i++)
10473 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10475 if (wback_offset != 0)
10476 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10478 seq = get_insns ();
10479 end_sequence ();
10481 return seq;
10484 result = gen_rtx_PARALLEL (VOIDmode,
10485 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10486 if (wback_offset != 0)
10488 XVECEXP (result, 0, 0)
10489 = gen_rtx_SET (VOIDmode, basereg,
10490 plus_constant (Pmode, basereg, wback_offset));
10491 i = 1;
10492 count++;
10495 for (j = 0; i < count; i++, j++)
10496 XVECEXP (result, 0, i)
10497 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10499 return result;
10502 /* Generate a store-multiple instruction. COUNT is the number of stores in
10503 the instruction; REGS and MEMS are arrays containing the operands.
10504 BASEREG is the base register to be used in addressing the memory operands.
10505 WBACK_OFFSET is nonzero if the instruction should update the base
10506 register. */
10508 static rtx
10509 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10510 HOST_WIDE_INT wback_offset)
10512 int i = 0, j;
10513 rtx result;
10515 if (GET_CODE (basereg) == PLUS)
10516 basereg = XEXP (basereg, 0);
10518 if (!multiple_operation_profitable_p (false, count, 0))
10520 rtx seq;
10522 start_sequence ();
10524 for (i = 0; i < count; i++)
10525 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10527 if (wback_offset != 0)
10528 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10530 seq = get_insns ();
10531 end_sequence ();
10533 return seq;
10536 result = gen_rtx_PARALLEL (VOIDmode,
10537 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10538 if (wback_offset != 0)
10540 XVECEXP (result, 0, 0)
10541 = gen_rtx_SET (VOIDmode, basereg,
10542 plus_constant (Pmode, basereg, wback_offset));
10543 i = 1;
10544 count++;
10547 for (j = 0; i < count; i++, j++)
10548 XVECEXP (result, 0, i)
10549 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10551 return result;
10554 /* Generate either a load-multiple or a store-multiple instruction. This
10555 function can be used in situations where we can start with a single MEM
10556 rtx and adjust its address upwards.
10557 COUNT is the number of operations in the instruction, not counting a
10558 possible update of the base register. REGS is an array containing the
10559 register operands.
10560 BASEREG is the base register to be used in addressing the memory operands,
10561 which are constructed from BASEMEM.
10562 WRITE_BACK specifies whether the generated instruction should include an
10563 update of the base register.
10564 OFFSETP is used to pass an offset to and from this function; this offset
10565 is not used when constructing the address (instead BASEMEM should have an
10566 appropriate offset in its address), it is used only for setting
10567 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10569 static rtx
10570 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10571 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10573 rtx mems[MAX_LDM_STM_OPS];
10574 HOST_WIDE_INT offset = *offsetp;
10575 int i;
10577 gcc_assert (count <= MAX_LDM_STM_OPS);
10579 if (GET_CODE (basereg) == PLUS)
10580 basereg = XEXP (basereg, 0);
10582 for (i = 0; i < count; i++)
10584 rtx addr = plus_constant (Pmode, basereg, i * 4);
10585 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10586 offset += 4;
10589 if (write_back)
10590 *offsetp = offset;
10592 if (is_load)
10593 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10594 write_back ? 4 * count : 0);
10595 else
10596 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10597 write_back ? 4 * count : 0);
10601 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10602 rtx basemem, HOST_WIDE_INT *offsetp)
10604 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10605 offsetp);
10609 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10610 rtx basemem, HOST_WIDE_INT *offsetp)
10612 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10613 offsetp);
10616 /* Called from a peephole2 expander to turn a sequence of loads into an
10617 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10618 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10619 is true if we can reorder the registers because they are used commutatively
10620 subsequently.
10621 Returns true iff we could generate a new instruction. */
10623 bool
10624 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10626 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10627 rtx mems[MAX_LDM_STM_OPS];
10628 int i, j, base_reg;
10629 rtx base_reg_rtx;
10630 HOST_WIDE_INT offset;
10631 int write_back = FALSE;
10632 int ldm_case;
10633 rtx addr;
10635 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10636 &base_reg, &offset, !sort_regs);
10638 if (ldm_case == 0)
10639 return false;
10641 if (sort_regs)
10642 for (i = 0; i < nops - 1; i++)
10643 for (j = i + 1; j < nops; j++)
10644 if (regs[i] > regs[j])
10646 int t = regs[i];
10647 regs[i] = regs[j];
10648 regs[j] = t;
10650 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10652 if (TARGET_THUMB1)
10654 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10655 gcc_assert (ldm_case == 1 || ldm_case == 5);
10656 write_back = TRUE;
10659 if (ldm_case == 5)
10661 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10662 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10663 offset = 0;
10664 if (!TARGET_THUMB1)
10666 base_reg = regs[0];
10667 base_reg_rtx = newbase;
10671 for (i = 0; i < nops; i++)
10673 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10674 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10675 SImode, addr, 0);
10677 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10678 write_back ? offset + i * 4 : 0));
10679 return true;
10682 /* Called from a peephole2 expander to turn a sequence of stores into an
10683 STM instruction. OPERANDS are the operands found by the peephole matcher;
10684 NOPS indicates how many separate stores we are trying to combine.
10685 Returns true iff we could generate a new instruction. */
10687 bool
10688 gen_stm_seq (rtx *operands, int nops)
10690 int i;
10691 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10692 rtx mems[MAX_LDM_STM_OPS];
10693 int base_reg;
10694 rtx base_reg_rtx;
10695 HOST_WIDE_INT offset;
10696 int write_back = FALSE;
10697 int stm_case;
10698 rtx addr;
10699 bool base_reg_dies;
10701 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10702 mem_order, &base_reg, &offset, true);
10704 if (stm_case == 0)
10705 return false;
10707 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10709 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10710 if (TARGET_THUMB1)
10712 gcc_assert (base_reg_dies);
10713 write_back = TRUE;
10716 if (stm_case == 5)
10718 gcc_assert (base_reg_dies);
10719 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10720 offset = 0;
10723 addr = plus_constant (Pmode, base_reg_rtx, offset);
10725 for (i = 0; i < nops; i++)
10727 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10728 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10729 SImode, addr, 0);
10731 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10732 write_back ? offset + i * 4 : 0));
10733 return true;
10736 /* Called from a peephole2 expander to turn a sequence of stores that are
10737 preceded by constant loads into an STM instruction. OPERANDS are the
10738 operands found by the peephole matcher; NOPS indicates how many
10739 separate stores we are trying to combine; there are 2 * NOPS
10740 instructions in the peephole.
10741 Returns true iff we could generate a new instruction. */
10743 bool
10744 gen_const_stm_seq (rtx *operands, int nops)
10746 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10747 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10748 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10749 rtx mems[MAX_LDM_STM_OPS];
10750 int base_reg;
10751 rtx base_reg_rtx;
10752 HOST_WIDE_INT offset;
10753 int write_back = FALSE;
10754 int stm_case;
10755 rtx addr;
10756 bool base_reg_dies;
10757 int i, j;
10758 HARD_REG_SET allocated;
10760 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10761 mem_order, &base_reg, &offset, false);
10763 if (stm_case == 0)
10764 return false;
10766 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10768 /* If the same register is used more than once, try to find a free
10769 register. */
10770 CLEAR_HARD_REG_SET (allocated);
10771 for (i = 0; i < nops; i++)
10773 for (j = i + 1; j < nops; j++)
10774 if (regs[i] == regs[j])
10776 rtx t = peep2_find_free_register (0, nops * 2,
10777 TARGET_THUMB1 ? "l" : "r",
10778 SImode, &allocated);
10779 if (t == NULL_RTX)
10780 return false;
10781 reg_rtxs[i] = t;
10782 regs[i] = REGNO (t);
10786 /* Compute an ordering that maps the register numbers to an ascending
10787 sequence. */
10788 reg_order[0] = 0;
10789 for (i = 0; i < nops; i++)
10790 if (regs[i] < regs[reg_order[0]])
10791 reg_order[0] = i;
10793 for (i = 1; i < nops; i++)
10795 int this_order = reg_order[i - 1];
10796 for (j = 0; j < nops; j++)
10797 if (regs[j] > regs[reg_order[i - 1]]
10798 && (this_order == reg_order[i - 1]
10799 || regs[j] < regs[this_order]))
10800 this_order = j;
10801 reg_order[i] = this_order;
10804 /* Ensure that registers that must be live after the instruction end
10805 up with the correct value. */
10806 for (i = 0; i < nops; i++)
10808 int this_order = reg_order[i];
10809 if ((this_order != mem_order[i]
10810 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10811 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10812 return false;
10815 /* Load the constants. */
10816 for (i = 0; i < nops; i++)
10818 rtx op = operands[2 * nops + mem_order[i]];
10819 sorted_regs[i] = regs[reg_order[i]];
10820 emit_move_insn (reg_rtxs[reg_order[i]], op);
10823 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10825 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10826 if (TARGET_THUMB1)
10828 gcc_assert (base_reg_dies);
10829 write_back = TRUE;
10832 if (stm_case == 5)
10834 gcc_assert (base_reg_dies);
10835 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10836 offset = 0;
10839 addr = plus_constant (Pmode, base_reg_rtx, offset);
10841 for (i = 0; i < nops; i++)
10843 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10844 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10845 SImode, addr, 0);
10847 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10848 write_back ? offset + i * 4 : 0));
10849 return true;
10852 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10853 unaligned copies on processors which support unaligned semantics for those
10854 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10855 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10856 An interleave factor of 1 (the minimum) will perform no interleaving.
10857 Load/store multiple are used for aligned addresses where possible. */
10859 static void
10860 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10861 HOST_WIDE_INT length,
10862 unsigned int interleave_factor)
10864 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10865 int *regnos = XALLOCAVEC (int, interleave_factor);
10866 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10867 HOST_WIDE_INT i, j;
10868 HOST_WIDE_INT remaining = length, words;
10869 rtx halfword_tmp = NULL, byte_tmp = NULL;
10870 rtx dst, src;
10871 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10872 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10873 HOST_WIDE_INT srcoffset, dstoffset;
10874 HOST_WIDE_INT src_autoinc, dst_autoinc;
10875 rtx mem, addr;
10877 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10879 /* Use hard registers if we have aligned source or destination so we can use
10880 load/store multiple with contiguous registers. */
10881 if (dst_aligned || src_aligned)
10882 for (i = 0; i < interleave_factor; i++)
10883 regs[i] = gen_rtx_REG (SImode, i);
10884 else
10885 for (i = 0; i < interleave_factor; i++)
10886 regs[i] = gen_reg_rtx (SImode);
10888 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10889 src = copy_addr_to_reg (XEXP (srcbase, 0));
10891 srcoffset = dstoffset = 0;
10893 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10894 For copying the last bytes we want to subtract this offset again. */
10895 src_autoinc = dst_autoinc = 0;
10897 for (i = 0; i < interleave_factor; i++)
10898 regnos[i] = i;
10900 /* Copy BLOCK_SIZE_BYTES chunks. */
10902 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10904 /* Load words. */
10905 if (src_aligned && interleave_factor > 1)
10907 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10908 TRUE, srcbase, &srcoffset));
10909 src_autoinc += UNITS_PER_WORD * interleave_factor;
10911 else
10913 for (j = 0; j < interleave_factor; j++)
10915 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
10916 - src_autoinc));
10917 mem = adjust_automodify_address (srcbase, SImode, addr,
10918 srcoffset + j * UNITS_PER_WORD);
10919 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10921 srcoffset += block_size_bytes;
10924 /* Store words. */
10925 if (dst_aligned && interleave_factor > 1)
10927 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10928 TRUE, dstbase, &dstoffset));
10929 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10931 else
10933 for (j = 0; j < interleave_factor; j++)
10935 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
10936 - dst_autoinc));
10937 mem = adjust_automodify_address (dstbase, SImode, addr,
10938 dstoffset + j * UNITS_PER_WORD);
10939 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10941 dstoffset += block_size_bytes;
10944 remaining -= block_size_bytes;
10947 /* Copy any whole words left (note these aren't interleaved with any
10948 subsequent halfword/byte load/stores in the interests of simplicity). */
10950 words = remaining / UNITS_PER_WORD;
10952 gcc_assert (words < interleave_factor);
10954 if (src_aligned && words > 1)
10956 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
10957 &srcoffset));
10958 src_autoinc += UNITS_PER_WORD * words;
10960 else
10962 for (j = 0; j < words; j++)
10964 addr = plus_constant (Pmode, src,
10965 srcoffset + j * UNITS_PER_WORD - src_autoinc);
10966 mem = adjust_automodify_address (srcbase, SImode, addr,
10967 srcoffset + j * UNITS_PER_WORD);
10968 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10970 srcoffset += words * UNITS_PER_WORD;
10973 if (dst_aligned && words > 1)
10975 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
10976 &dstoffset));
10977 dst_autoinc += words * UNITS_PER_WORD;
10979 else
10981 for (j = 0; j < words; j++)
10983 addr = plus_constant (Pmode, dst,
10984 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
10985 mem = adjust_automodify_address (dstbase, SImode, addr,
10986 dstoffset + j * UNITS_PER_WORD);
10987 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10989 dstoffset += words * UNITS_PER_WORD;
10992 remaining -= words * UNITS_PER_WORD;
10994 gcc_assert (remaining < 4);
10996 /* Copy a halfword if necessary. */
10998 if (remaining >= 2)
11000 halfword_tmp = gen_reg_rtx (SImode);
11002 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11003 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11004 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11006 /* Either write out immediately, or delay until we've loaded the last
11007 byte, depending on interleave factor. */
11008 if (interleave_factor == 1)
11010 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11011 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11012 emit_insn (gen_unaligned_storehi (mem,
11013 gen_lowpart (HImode, halfword_tmp)));
11014 halfword_tmp = NULL;
11015 dstoffset += 2;
11018 remaining -= 2;
11019 srcoffset += 2;
11022 gcc_assert (remaining < 2);
11024 /* Copy last byte. */
11026 if ((remaining & 1) != 0)
11028 byte_tmp = gen_reg_rtx (SImode);
11030 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11031 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11032 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11034 if (interleave_factor == 1)
11036 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11037 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11038 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11039 byte_tmp = NULL;
11040 dstoffset++;
11043 remaining--;
11044 srcoffset++;
11047 /* Store last halfword if we haven't done so already. */
11049 if (halfword_tmp)
11051 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11052 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11053 emit_insn (gen_unaligned_storehi (mem,
11054 gen_lowpart (HImode, halfword_tmp)));
11055 dstoffset += 2;
11058 /* Likewise for last byte. */
11060 if (byte_tmp)
11062 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11063 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11064 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11065 dstoffset++;
11068 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11071 /* From mips_adjust_block_mem:
11073 Helper function for doing a loop-based block operation on memory
11074 reference MEM. Each iteration of the loop will operate on LENGTH
11075 bytes of MEM.
11077 Create a new base register for use within the loop and point it to
11078 the start of MEM. Create a new memory reference that uses this
11079 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11081 static void
11082 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11083 rtx *loop_mem)
11085 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11087 /* Although the new mem does not refer to a known location,
11088 it does keep up to LENGTH bytes of alignment. */
11089 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11090 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11093 /* From mips_block_move_loop:
11095 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11096 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11097 the memory regions do not overlap. */
11099 static void
11100 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11101 unsigned int interleave_factor,
11102 HOST_WIDE_INT bytes_per_iter)
11104 rtx label, src_reg, dest_reg, final_src, test;
11105 HOST_WIDE_INT leftover;
11107 leftover = length % bytes_per_iter;
11108 length -= leftover;
11110 /* Create registers and memory references for use within the loop. */
11111 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11112 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11114 /* Calculate the value that SRC_REG should have after the last iteration of
11115 the loop. */
11116 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11117 0, 0, OPTAB_WIDEN);
11119 /* Emit the start of the loop. */
11120 label = gen_label_rtx ();
11121 emit_label (label);
11123 /* Emit the loop body. */
11124 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11125 interleave_factor);
11127 /* Move on to the next block. */
11128 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11129 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11131 /* Emit the loop condition. */
11132 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11133 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11135 /* Mop up any left-over bytes. */
11136 if (leftover)
11137 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11140 /* Emit a block move when either the source or destination is unaligned (not
11141 aligned to a four-byte boundary). This may need further tuning depending on
11142 core type, optimize_size setting, etc. */
11144 static int
11145 arm_movmemqi_unaligned (rtx *operands)
11147 HOST_WIDE_INT length = INTVAL (operands[2]);
11149 if (optimize_size)
11151 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11152 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11153 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11154 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11155 or dst_aligned though: allow more interleaving in those cases since the
11156 resulting code can be smaller. */
11157 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11158 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11160 if (length > 12)
11161 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11162 interleave_factor, bytes_per_iter);
11163 else
11164 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11165 interleave_factor);
11167 else
11169 /* Note that the loop created by arm_block_move_unaligned_loop may be
11170 subject to loop unrolling, which makes tuning this condition a little
11171 redundant. */
11172 if (length > 32)
11173 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11174 else
11175 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11178 return 1;
11182 arm_gen_movmemqi (rtx *operands)
11184 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11185 HOST_WIDE_INT srcoffset, dstoffset;
11186 int i;
11187 rtx src, dst, srcbase, dstbase;
11188 rtx part_bytes_reg = NULL;
11189 rtx mem;
11191 if (GET_CODE (operands[2]) != CONST_INT
11192 || GET_CODE (operands[3]) != CONST_INT
11193 || INTVAL (operands[2]) > 64)
11194 return 0;
11196 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11197 return arm_movmemqi_unaligned (operands);
11199 if (INTVAL (operands[3]) & 3)
11200 return 0;
11202 dstbase = operands[0];
11203 srcbase = operands[1];
11205 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11206 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11208 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11209 out_words_to_go = INTVAL (operands[2]) / 4;
11210 last_bytes = INTVAL (operands[2]) & 3;
11211 dstoffset = srcoffset = 0;
11213 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11214 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11216 for (i = 0; in_words_to_go >= 2; i+=4)
11218 if (in_words_to_go > 4)
11219 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11220 TRUE, srcbase, &srcoffset));
11221 else
11222 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11223 src, FALSE, srcbase,
11224 &srcoffset));
11226 if (out_words_to_go)
11228 if (out_words_to_go > 4)
11229 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11230 TRUE, dstbase, &dstoffset));
11231 else if (out_words_to_go != 1)
11232 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11233 out_words_to_go, dst,
11234 (last_bytes == 0
11235 ? FALSE : TRUE),
11236 dstbase, &dstoffset));
11237 else
11239 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11240 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11241 if (last_bytes != 0)
11243 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11244 dstoffset += 4;
11249 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11250 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11253 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11254 if (out_words_to_go)
11256 rtx sreg;
11258 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11259 sreg = copy_to_reg (mem);
11261 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11262 emit_move_insn (mem, sreg);
11263 in_words_to_go--;
11265 gcc_assert (!in_words_to_go); /* Sanity check */
11268 if (in_words_to_go)
11270 gcc_assert (in_words_to_go > 0);
11272 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11273 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11276 gcc_assert (!last_bytes || part_bytes_reg);
11278 if (BYTES_BIG_ENDIAN && last_bytes)
11280 rtx tmp = gen_reg_rtx (SImode);
11282 /* The bytes we want are in the top end of the word. */
11283 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11284 GEN_INT (8 * (4 - last_bytes))));
11285 part_bytes_reg = tmp;
11287 while (last_bytes)
11289 mem = adjust_automodify_address (dstbase, QImode,
11290 plus_constant (Pmode, dst,
11291 last_bytes - 1),
11292 dstoffset + last_bytes - 1);
11293 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11295 if (--last_bytes)
11297 tmp = gen_reg_rtx (SImode);
11298 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11299 part_bytes_reg = tmp;
11304 else
11306 if (last_bytes > 1)
11308 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11309 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11310 last_bytes -= 2;
11311 if (last_bytes)
11313 rtx tmp = gen_reg_rtx (SImode);
11314 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11315 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11316 part_bytes_reg = tmp;
11317 dstoffset += 2;
11321 if (last_bytes)
11323 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11324 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11328 return 1;
11331 /* Select a dominance comparison mode if possible for a test of the general
11332 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11333 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11334 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11335 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11336 In all cases OP will be either EQ or NE, but we don't need to know which
11337 here. If we are unable to support a dominance comparison we return
11338 CC mode. This will then fail to match for the RTL expressions that
11339 generate this call. */
11340 enum machine_mode
11341 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11343 enum rtx_code cond1, cond2;
11344 int swapped = 0;
11346 /* Currently we will probably get the wrong result if the individual
11347 comparisons are not simple. This also ensures that it is safe to
11348 reverse a comparison if necessary. */
11349 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11350 != CCmode)
11351 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11352 != CCmode))
11353 return CCmode;
11355 /* The if_then_else variant of this tests the second condition if the
11356 first passes, but is true if the first fails. Reverse the first
11357 condition to get a true "inclusive-or" expression. */
11358 if (cond_or == DOM_CC_NX_OR_Y)
11359 cond1 = reverse_condition (cond1);
11361 /* If the comparisons are not equal, and one doesn't dominate the other,
11362 then we can't do this. */
11363 if (cond1 != cond2
11364 && !comparison_dominates_p (cond1, cond2)
11365 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11366 return CCmode;
11368 if (swapped)
11370 enum rtx_code temp = cond1;
11371 cond1 = cond2;
11372 cond2 = temp;
11375 switch (cond1)
11377 case EQ:
11378 if (cond_or == DOM_CC_X_AND_Y)
11379 return CC_DEQmode;
11381 switch (cond2)
11383 case EQ: return CC_DEQmode;
11384 case LE: return CC_DLEmode;
11385 case LEU: return CC_DLEUmode;
11386 case GE: return CC_DGEmode;
11387 case GEU: return CC_DGEUmode;
11388 default: gcc_unreachable ();
11391 case LT:
11392 if (cond_or == DOM_CC_X_AND_Y)
11393 return CC_DLTmode;
11395 switch (cond2)
11397 case LT:
11398 return CC_DLTmode;
11399 case LE:
11400 return CC_DLEmode;
11401 case NE:
11402 return CC_DNEmode;
11403 default:
11404 gcc_unreachable ();
11407 case GT:
11408 if (cond_or == DOM_CC_X_AND_Y)
11409 return CC_DGTmode;
11411 switch (cond2)
11413 case GT:
11414 return CC_DGTmode;
11415 case GE:
11416 return CC_DGEmode;
11417 case NE:
11418 return CC_DNEmode;
11419 default:
11420 gcc_unreachable ();
11423 case LTU:
11424 if (cond_or == DOM_CC_X_AND_Y)
11425 return CC_DLTUmode;
11427 switch (cond2)
11429 case LTU:
11430 return CC_DLTUmode;
11431 case LEU:
11432 return CC_DLEUmode;
11433 case NE:
11434 return CC_DNEmode;
11435 default:
11436 gcc_unreachable ();
11439 case GTU:
11440 if (cond_or == DOM_CC_X_AND_Y)
11441 return CC_DGTUmode;
11443 switch (cond2)
11445 case GTU:
11446 return CC_DGTUmode;
11447 case GEU:
11448 return CC_DGEUmode;
11449 case NE:
11450 return CC_DNEmode;
11451 default:
11452 gcc_unreachable ();
11455 /* The remaining cases only occur when both comparisons are the
11456 same. */
11457 case NE:
11458 gcc_assert (cond1 == cond2);
11459 return CC_DNEmode;
11461 case LE:
11462 gcc_assert (cond1 == cond2);
11463 return CC_DLEmode;
11465 case GE:
11466 gcc_assert (cond1 == cond2);
11467 return CC_DGEmode;
11469 case LEU:
11470 gcc_assert (cond1 == cond2);
11471 return CC_DLEUmode;
11473 case GEU:
11474 gcc_assert (cond1 == cond2);
11475 return CC_DGEUmode;
11477 default:
11478 gcc_unreachable ();
11482 enum machine_mode
11483 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11485 /* All floating point compares return CCFP if it is an equality
11486 comparison, and CCFPE otherwise. */
11487 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11489 switch (op)
11491 case EQ:
11492 case NE:
11493 case UNORDERED:
11494 case ORDERED:
11495 case UNLT:
11496 case UNLE:
11497 case UNGT:
11498 case UNGE:
11499 case UNEQ:
11500 case LTGT:
11501 return CCFPmode;
11503 case LT:
11504 case LE:
11505 case GT:
11506 case GE:
11507 return CCFPEmode;
11509 default:
11510 gcc_unreachable ();
11514 /* A compare with a shifted operand. Because of canonicalization, the
11515 comparison will have to be swapped when we emit the assembler. */
11516 if (GET_MODE (y) == SImode
11517 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11518 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11519 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11520 || GET_CODE (x) == ROTATERT))
11521 return CC_SWPmode;
11523 /* This operation is performed swapped, but since we only rely on the Z
11524 flag we don't need an additional mode. */
11525 if (GET_MODE (y) == SImode
11526 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11527 && GET_CODE (x) == NEG
11528 && (op == EQ || op == NE))
11529 return CC_Zmode;
11531 /* This is a special case that is used by combine to allow a
11532 comparison of a shifted byte load to be split into a zero-extend
11533 followed by a comparison of the shifted integer (only valid for
11534 equalities and unsigned inequalities). */
11535 if (GET_MODE (x) == SImode
11536 && GET_CODE (x) == ASHIFT
11537 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11538 && GET_CODE (XEXP (x, 0)) == SUBREG
11539 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11540 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11541 && (op == EQ || op == NE
11542 || op == GEU || op == GTU || op == LTU || op == LEU)
11543 && GET_CODE (y) == CONST_INT)
11544 return CC_Zmode;
11546 /* A construct for a conditional compare, if the false arm contains
11547 0, then both conditions must be true, otherwise either condition
11548 must be true. Not all conditions are possible, so CCmode is
11549 returned if it can't be done. */
11550 if (GET_CODE (x) == IF_THEN_ELSE
11551 && (XEXP (x, 2) == const0_rtx
11552 || XEXP (x, 2) == const1_rtx)
11553 && COMPARISON_P (XEXP (x, 0))
11554 && COMPARISON_P (XEXP (x, 1)))
11555 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11556 INTVAL (XEXP (x, 2)));
11558 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11559 if (GET_CODE (x) == AND
11560 && (op == EQ || op == NE)
11561 && COMPARISON_P (XEXP (x, 0))
11562 && COMPARISON_P (XEXP (x, 1)))
11563 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11564 DOM_CC_X_AND_Y);
11566 if (GET_CODE (x) == IOR
11567 && (op == EQ || op == NE)
11568 && COMPARISON_P (XEXP (x, 0))
11569 && COMPARISON_P (XEXP (x, 1)))
11570 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11571 DOM_CC_X_OR_Y);
11573 /* An operation (on Thumb) where we want to test for a single bit.
11574 This is done by shifting that bit up into the top bit of a
11575 scratch register; we can then branch on the sign bit. */
11576 if (TARGET_THUMB1
11577 && GET_MODE (x) == SImode
11578 && (op == EQ || op == NE)
11579 && GET_CODE (x) == ZERO_EXTRACT
11580 && XEXP (x, 1) == const1_rtx)
11581 return CC_Nmode;
11583 /* An operation that sets the condition codes as a side-effect, the
11584 V flag is not set correctly, so we can only use comparisons where
11585 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11586 instead.) */
11587 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11588 if (GET_MODE (x) == SImode
11589 && y == const0_rtx
11590 && (op == EQ || op == NE || op == LT || op == GE)
11591 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11592 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11593 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11594 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11595 || GET_CODE (x) == LSHIFTRT
11596 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11597 || GET_CODE (x) == ROTATERT
11598 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11599 return CC_NOOVmode;
11601 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11602 return CC_Zmode;
11604 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11605 && GET_CODE (x) == PLUS
11606 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11607 return CC_Cmode;
11609 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11611 switch (op)
11613 case EQ:
11614 case NE:
11615 /* A DImode comparison against zero can be implemented by
11616 or'ing the two halves together. */
11617 if (y == const0_rtx)
11618 return CC_Zmode;
11620 /* We can do an equality test in three Thumb instructions. */
11621 if (!TARGET_32BIT)
11622 return CC_Zmode;
11624 /* FALLTHROUGH */
11626 case LTU:
11627 case LEU:
11628 case GTU:
11629 case GEU:
11630 /* DImode unsigned comparisons can be implemented by cmp +
11631 cmpeq without a scratch register. Not worth doing in
11632 Thumb-2. */
11633 if (TARGET_32BIT)
11634 return CC_CZmode;
11636 /* FALLTHROUGH */
11638 case LT:
11639 case LE:
11640 case GT:
11641 case GE:
11642 /* DImode signed and unsigned comparisons can be implemented
11643 by cmp + sbcs with a scratch register, but that does not
11644 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11645 gcc_assert (op != EQ && op != NE);
11646 return CC_NCVmode;
11648 default:
11649 gcc_unreachable ();
11653 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11654 return GET_MODE (x);
11656 return CCmode;
11659 /* X and Y are two things to compare using CODE. Emit the compare insn and
11660 return the rtx for register 0 in the proper mode. FP means this is a
11661 floating point compare: I don't think that it is needed on the arm. */
11663 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11665 enum machine_mode mode;
11666 rtx cc_reg;
11667 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11669 /* We might have X as a constant, Y as a register because of the predicates
11670 used for cmpdi. If so, force X to a register here. */
11671 if (dimode_comparison && !REG_P (x))
11672 x = force_reg (DImode, x);
11674 mode = SELECT_CC_MODE (code, x, y);
11675 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11677 if (dimode_comparison
11678 && mode != CC_CZmode)
11680 rtx clobber, set;
11682 /* To compare two non-zero values for equality, XOR them and
11683 then compare against zero. Not used for ARM mode; there
11684 CC_CZmode is cheaper. */
11685 if (mode == CC_Zmode && y != const0_rtx)
11687 gcc_assert (!reload_completed);
11688 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11689 y = const0_rtx;
11692 /* A scratch register is required. */
11693 if (reload_completed)
11694 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11695 else
11696 scratch = gen_rtx_SCRATCH (SImode);
11698 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11699 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11700 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11702 else
11703 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11705 return cc_reg;
11708 /* Generate a sequence of insns that will generate the correct return
11709 address mask depending on the physical architecture that the program
11710 is running on. */
11712 arm_gen_return_addr_mask (void)
11714 rtx reg = gen_reg_rtx (Pmode);
11716 emit_insn (gen_return_addr_mask (reg));
11717 return reg;
11720 void
11721 arm_reload_in_hi (rtx *operands)
11723 rtx ref = operands[1];
11724 rtx base, scratch;
11725 HOST_WIDE_INT offset = 0;
11727 if (GET_CODE (ref) == SUBREG)
11729 offset = SUBREG_BYTE (ref);
11730 ref = SUBREG_REG (ref);
11733 if (GET_CODE (ref) == REG)
11735 /* We have a pseudo which has been spilt onto the stack; there
11736 are two cases here: the first where there is a simple
11737 stack-slot replacement and a second where the stack-slot is
11738 out of range, or is used as a subreg. */
11739 if (reg_equiv_mem (REGNO (ref)))
11741 ref = reg_equiv_mem (REGNO (ref));
11742 base = find_replacement (&XEXP (ref, 0));
11744 else
11745 /* The slot is out of range, or was dressed up in a SUBREG. */
11746 base = reg_equiv_address (REGNO (ref));
11748 else
11749 base = find_replacement (&XEXP (ref, 0));
11751 /* Handle the case where the address is too complex to be offset by 1. */
11752 if (GET_CODE (base) == MINUS
11753 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11755 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11757 emit_set_insn (base_plus, base);
11758 base = base_plus;
11760 else if (GET_CODE (base) == PLUS)
11762 /* The addend must be CONST_INT, or we would have dealt with it above. */
11763 HOST_WIDE_INT hi, lo;
11765 offset += INTVAL (XEXP (base, 1));
11766 base = XEXP (base, 0);
11768 /* Rework the address into a legal sequence of insns. */
11769 /* Valid range for lo is -4095 -> 4095 */
11770 lo = (offset >= 0
11771 ? (offset & 0xfff)
11772 : -((-offset) & 0xfff));
11774 /* Corner case, if lo is the max offset then we would be out of range
11775 once we have added the additional 1 below, so bump the msb into the
11776 pre-loading insn(s). */
11777 if (lo == 4095)
11778 lo &= 0x7ff;
11780 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11781 ^ (HOST_WIDE_INT) 0x80000000)
11782 - (HOST_WIDE_INT) 0x80000000);
11784 gcc_assert (hi + lo == offset);
11786 if (hi != 0)
11788 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11790 /* Get the base address; addsi3 knows how to handle constants
11791 that require more than one insn. */
11792 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11793 base = base_plus;
11794 offset = lo;
11798 /* Operands[2] may overlap operands[0] (though it won't overlap
11799 operands[1]), that's why we asked for a DImode reg -- so we can
11800 use the bit that does not overlap. */
11801 if (REGNO (operands[2]) == REGNO (operands[0]))
11802 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11803 else
11804 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11806 emit_insn (gen_zero_extendqisi2 (scratch,
11807 gen_rtx_MEM (QImode,
11808 plus_constant (Pmode, base,
11809 offset))));
11810 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11811 gen_rtx_MEM (QImode,
11812 plus_constant (Pmode, base,
11813 offset + 1))));
11814 if (!BYTES_BIG_ENDIAN)
11815 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11816 gen_rtx_IOR (SImode,
11817 gen_rtx_ASHIFT
11818 (SImode,
11819 gen_rtx_SUBREG (SImode, operands[0], 0),
11820 GEN_INT (8)),
11821 scratch));
11822 else
11823 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11824 gen_rtx_IOR (SImode,
11825 gen_rtx_ASHIFT (SImode, scratch,
11826 GEN_INT (8)),
11827 gen_rtx_SUBREG (SImode, operands[0], 0)));
11830 /* Handle storing a half-word to memory during reload by synthesizing as two
11831 byte stores. Take care not to clobber the input values until after we
11832 have moved them somewhere safe. This code assumes that if the DImode
11833 scratch in operands[2] overlaps either the input value or output address
11834 in some way, then that value must die in this insn (we absolutely need
11835 two scratch registers for some corner cases). */
11836 void
11837 arm_reload_out_hi (rtx *operands)
11839 rtx ref = operands[0];
11840 rtx outval = operands[1];
11841 rtx base, scratch;
11842 HOST_WIDE_INT offset = 0;
11844 if (GET_CODE (ref) == SUBREG)
11846 offset = SUBREG_BYTE (ref);
11847 ref = SUBREG_REG (ref);
11850 if (GET_CODE (ref) == REG)
11852 /* We have a pseudo which has been spilt onto the stack; there
11853 are two cases here: the first where there is a simple
11854 stack-slot replacement and a second where the stack-slot is
11855 out of range, or is used as a subreg. */
11856 if (reg_equiv_mem (REGNO (ref)))
11858 ref = reg_equiv_mem (REGNO (ref));
11859 base = find_replacement (&XEXP (ref, 0));
11861 else
11862 /* The slot is out of range, or was dressed up in a SUBREG. */
11863 base = reg_equiv_address (REGNO (ref));
11865 else
11866 base = find_replacement (&XEXP (ref, 0));
11868 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11870 /* Handle the case where the address is too complex to be offset by 1. */
11871 if (GET_CODE (base) == MINUS
11872 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11874 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11876 /* Be careful not to destroy OUTVAL. */
11877 if (reg_overlap_mentioned_p (base_plus, outval))
11879 /* Updating base_plus might destroy outval, see if we can
11880 swap the scratch and base_plus. */
11881 if (!reg_overlap_mentioned_p (scratch, outval))
11883 rtx tmp = scratch;
11884 scratch = base_plus;
11885 base_plus = tmp;
11887 else
11889 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11891 /* Be conservative and copy OUTVAL into the scratch now,
11892 this should only be necessary if outval is a subreg
11893 of something larger than a word. */
11894 /* XXX Might this clobber base? I can't see how it can,
11895 since scratch is known to overlap with OUTVAL, and
11896 must be wider than a word. */
11897 emit_insn (gen_movhi (scratch_hi, outval));
11898 outval = scratch_hi;
11902 emit_set_insn (base_plus, base);
11903 base = base_plus;
11905 else if (GET_CODE (base) == PLUS)
11907 /* The addend must be CONST_INT, or we would have dealt with it above. */
11908 HOST_WIDE_INT hi, lo;
11910 offset += INTVAL (XEXP (base, 1));
11911 base = XEXP (base, 0);
11913 /* Rework the address into a legal sequence of insns. */
11914 /* Valid range for lo is -4095 -> 4095 */
11915 lo = (offset >= 0
11916 ? (offset & 0xfff)
11917 : -((-offset) & 0xfff));
11919 /* Corner case, if lo is the max offset then we would be out of range
11920 once we have added the additional 1 below, so bump the msb into the
11921 pre-loading insn(s). */
11922 if (lo == 4095)
11923 lo &= 0x7ff;
11925 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11926 ^ (HOST_WIDE_INT) 0x80000000)
11927 - (HOST_WIDE_INT) 0x80000000);
11929 gcc_assert (hi + lo == offset);
11931 if (hi != 0)
11933 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11935 /* Be careful not to destroy OUTVAL. */
11936 if (reg_overlap_mentioned_p (base_plus, outval))
11938 /* Updating base_plus might destroy outval, see if we
11939 can swap the scratch and base_plus. */
11940 if (!reg_overlap_mentioned_p (scratch, outval))
11942 rtx tmp = scratch;
11943 scratch = base_plus;
11944 base_plus = tmp;
11946 else
11948 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11950 /* Be conservative and copy outval into scratch now,
11951 this should only be necessary if outval is a
11952 subreg of something larger than a word. */
11953 /* XXX Might this clobber base? I can't see how it
11954 can, since scratch is known to overlap with
11955 outval. */
11956 emit_insn (gen_movhi (scratch_hi, outval));
11957 outval = scratch_hi;
11961 /* Get the base address; addsi3 knows how to handle constants
11962 that require more than one insn. */
11963 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11964 base = base_plus;
11965 offset = lo;
11969 if (BYTES_BIG_ENDIAN)
11971 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11972 plus_constant (Pmode, base,
11973 offset + 1)),
11974 gen_lowpart (QImode, outval)));
11975 emit_insn (gen_lshrsi3 (scratch,
11976 gen_rtx_SUBREG (SImode, outval, 0),
11977 GEN_INT (8)));
11978 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
11979 offset)),
11980 gen_lowpart (QImode, scratch)));
11982 else
11984 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
11985 offset)),
11986 gen_lowpart (QImode, outval)));
11987 emit_insn (gen_lshrsi3 (scratch,
11988 gen_rtx_SUBREG (SImode, outval, 0),
11989 GEN_INT (8)));
11990 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11991 plus_constant (Pmode, base,
11992 offset + 1)),
11993 gen_lowpart (QImode, scratch)));
11997 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11998 (padded to the size of a word) should be passed in a register. */
12000 static bool
12001 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12003 if (TARGET_AAPCS_BASED)
12004 return must_pass_in_stack_var_size (mode, type);
12005 else
12006 return must_pass_in_stack_var_size_or_pad (mode, type);
12010 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12011 Return true if an argument passed on the stack should be padded upwards,
12012 i.e. if the least-significant byte has useful data.
12013 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12014 aggregate types are placed in the lowest memory address. */
12016 bool
12017 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12019 if (!TARGET_AAPCS_BASED)
12020 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12022 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12023 return false;
12025 return true;
12029 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12030 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12031 register has useful data, and return the opposite if the most
12032 significant byte does. */
12034 bool
12035 arm_pad_reg_upward (enum machine_mode mode,
12036 tree type, int first ATTRIBUTE_UNUSED)
12038 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12040 /* For AAPCS, small aggregates, small fixed-point types,
12041 and small complex types are always padded upwards. */
12042 if (type)
12044 if ((AGGREGATE_TYPE_P (type)
12045 || TREE_CODE (type) == COMPLEX_TYPE
12046 || FIXED_POINT_TYPE_P (type))
12047 && int_size_in_bytes (type) <= 4)
12048 return true;
12050 else
12052 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12053 && GET_MODE_SIZE (mode) <= 4)
12054 return true;
12058 /* Otherwise, use default padding. */
12059 return !BYTES_BIG_ENDIAN;
12063 /* Print a symbolic form of X to the debug file, F. */
12064 static void
12065 arm_print_value (FILE *f, rtx x)
12067 switch (GET_CODE (x))
12069 case CONST_INT:
12070 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12071 return;
12073 case CONST_DOUBLE:
12074 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12075 return;
12077 case CONST_VECTOR:
12079 int i;
12081 fprintf (f, "<");
12082 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12084 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12085 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12086 fputc (',', f);
12088 fprintf (f, ">");
12090 return;
12092 case CONST_STRING:
12093 fprintf (f, "\"%s\"", XSTR (x, 0));
12094 return;
12096 case SYMBOL_REF:
12097 fprintf (f, "`%s'", XSTR (x, 0));
12098 return;
12100 case LABEL_REF:
12101 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12102 return;
12104 case CONST:
12105 arm_print_value (f, XEXP (x, 0));
12106 return;
12108 case PLUS:
12109 arm_print_value (f, XEXP (x, 0));
12110 fprintf (f, "+");
12111 arm_print_value (f, XEXP (x, 1));
12112 return;
12114 case PC:
12115 fprintf (f, "pc");
12116 return;
12118 default:
12119 fprintf (f, "????");
12120 return;
12124 /* Routines for manipulation of the constant pool. */
12126 /* Arm instructions cannot load a large constant directly into a
12127 register; they have to come from a pc relative load. The constant
12128 must therefore be placed in the addressable range of the pc
12129 relative load. Depending on the precise pc relative load
12130 instruction the range is somewhere between 256 bytes and 4k. This
12131 means that we often have to dump a constant inside a function, and
12132 generate code to branch around it.
12134 It is important to minimize this, since the branches will slow
12135 things down and make the code larger.
12137 Normally we can hide the table after an existing unconditional
12138 branch so that there is no interruption of the flow, but in the
12139 worst case the code looks like this:
12141 ldr rn, L1
12143 b L2
12144 align
12145 L1: .long value
12149 ldr rn, L3
12151 b L4
12152 align
12153 L3: .long value
12157 We fix this by performing a scan after scheduling, which notices
12158 which instructions need to have their operands fetched from the
12159 constant table and builds the table.
12161 The algorithm starts by building a table of all the constants that
12162 need fixing up and all the natural barriers in the function (places
12163 where a constant table can be dropped without breaking the flow).
12164 For each fixup we note how far the pc-relative replacement will be
12165 able to reach and the offset of the instruction into the function.
12167 Having built the table we then group the fixes together to form
12168 tables that are as large as possible (subject to addressing
12169 constraints) and emit each table of constants after the last
12170 barrier that is within range of all the instructions in the group.
12171 If a group does not contain a barrier, then we forcibly create one
12172 by inserting a jump instruction into the flow. Once the table has
12173 been inserted, the insns are then modified to reference the
12174 relevant entry in the pool.
12176 Possible enhancements to the algorithm (not implemented) are:
12178 1) For some processors and object formats, there may be benefit in
12179 aligning the pools to the start of cache lines; this alignment
12180 would need to be taken into account when calculating addressability
12181 of a pool. */
12183 /* These typedefs are located at the start of this file, so that
12184 they can be used in the prototypes there. This comment is to
12185 remind readers of that fact so that the following structures
12186 can be understood more easily.
12188 typedef struct minipool_node Mnode;
12189 typedef struct minipool_fixup Mfix; */
12191 struct minipool_node
12193 /* Doubly linked chain of entries. */
12194 Mnode * next;
12195 Mnode * prev;
12196 /* The maximum offset into the code that this entry can be placed. While
12197 pushing fixes for forward references, all entries are sorted in order
12198 of increasing max_address. */
12199 HOST_WIDE_INT max_address;
12200 /* Similarly for an entry inserted for a backwards ref. */
12201 HOST_WIDE_INT min_address;
12202 /* The number of fixes referencing this entry. This can become zero
12203 if we "unpush" an entry. In this case we ignore the entry when we
12204 come to emit the code. */
12205 int refcount;
12206 /* The offset from the start of the minipool. */
12207 HOST_WIDE_INT offset;
12208 /* The value in table. */
12209 rtx value;
12210 /* The mode of value. */
12211 enum machine_mode mode;
12212 /* The size of the value. With iWMMXt enabled
12213 sizes > 4 also imply an alignment of 8-bytes. */
12214 int fix_size;
12217 struct minipool_fixup
12219 Mfix * next;
12220 rtx insn;
12221 HOST_WIDE_INT address;
12222 rtx * loc;
12223 enum machine_mode mode;
12224 int fix_size;
12225 rtx value;
12226 Mnode * minipool;
12227 HOST_WIDE_INT forwards;
12228 HOST_WIDE_INT backwards;
12231 /* Fixes less than a word need padding out to a word boundary. */
12232 #define MINIPOOL_FIX_SIZE(mode) \
12233 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12235 static Mnode * minipool_vector_head;
12236 static Mnode * minipool_vector_tail;
12237 static rtx minipool_vector_label;
12238 static int minipool_pad;
12240 /* The linked list of all minipool fixes required for this function. */
12241 Mfix * minipool_fix_head;
12242 Mfix * minipool_fix_tail;
12243 /* The fix entry for the current minipool, once it has been placed. */
12244 Mfix * minipool_barrier;
12246 /* Determines if INSN is the start of a jump table. Returns the end
12247 of the TABLE or NULL_RTX. */
12248 static rtx
12249 is_jump_table (rtx insn)
12251 rtx table;
12253 if (jump_to_label_p (insn)
12254 && ((table = next_real_insn (JUMP_LABEL (insn)))
12255 == next_real_insn (insn))
12256 && table != NULL
12257 && GET_CODE (table) == JUMP_INSN
12258 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12259 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12260 return table;
12262 return NULL_RTX;
12265 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12266 #define JUMP_TABLES_IN_TEXT_SECTION 0
12267 #endif
12269 static HOST_WIDE_INT
12270 get_jump_table_size (rtx insn)
12272 /* ADDR_VECs only take room if read-only data does into the text
12273 section. */
12274 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12276 rtx body = PATTERN (insn);
12277 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12278 HOST_WIDE_INT size;
12279 HOST_WIDE_INT modesize;
12281 modesize = GET_MODE_SIZE (GET_MODE (body));
12282 size = modesize * XVECLEN (body, elt);
12283 switch (modesize)
12285 case 1:
12286 /* Round up size of TBB table to a halfword boundary. */
12287 size = (size + 1) & ~(HOST_WIDE_INT)1;
12288 break;
12289 case 2:
12290 /* No padding necessary for TBH. */
12291 break;
12292 case 4:
12293 /* Add two bytes for alignment on Thumb. */
12294 if (TARGET_THUMB)
12295 size += 2;
12296 break;
12297 default:
12298 gcc_unreachable ();
12300 return size;
12303 return 0;
12306 /* Return the maximum amount of padding that will be inserted before
12307 label LABEL. */
12309 static HOST_WIDE_INT
12310 get_label_padding (rtx label)
12312 HOST_WIDE_INT align, min_insn_size;
12314 align = 1 << label_to_alignment (label);
12315 min_insn_size = TARGET_THUMB ? 2 : 4;
12316 return align > min_insn_size ? align - min_insn_size : 0;
12319 /* Move a minipool fix MP from its current location to before MAX_MP.
12320 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12321 constraints may need updating. */
12322 static Mnode *
12323 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12324 HOST_WIDE_INT max_address)
12326 /* The code below assumes these are different. */
12327 gcc_assert (mp != max_mp);
12329 if (max_mp == NULL)
12331 if (max_address < mp->max_address)
12332 mp->max_address = max_address;
12334 else
12336 if (max_address > max_mp->max_address - mp->fix_size)
12337 mp->max_address = max_mp->max_address - mp->fix_size;
12338 else
12339 mp->max_address = max_address;
12341 /* Unlink MP from its current position. Since max_mp is non-null,
12342 mp->prev must be non-null. */
12343 mp->prev->next = mp->next;
12344 if (mp->next != NULL)
12345 mp->next->prev = mp->prev;
12346 else
12347 minipool_vector_tail = mp->prev;
12349 /* Re-insert it before MAX_MP. */
12350 mp->next = max_mp;
12351 mp->prev = max_mp->prev;
12352 max_mp->prev = mp;
12354 if (mp->prev != NULL)
12355 mp->prev->next = mp;
12356 else
12357 minipool_vector_head = mp;
12360 /* Save the new entry. */
12361 max_mp = mp;
12363 /* Scan over the preceding entries and adjust their addresses as
12364 required. */
12365 while (mp->prev != NULL
12366 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12368 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12369 mp = mp->prev;
12372 return max_mp;
12375 /* Add a constant to the minipool for a forward reference. Returns the
12376 node added or NULL if the constant will not fit in this pool. */
12377 static Mnode *
12378 add_minipool_forward_ref (Mfix *fix)
12380 /* If set, max_mp is the first pool_entry that has a lower
12381 constraint than the one we are trying to add. */
12382 Mnode * max_mp = NULL;
12383 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12384 Mnode * mp;
12386 /* If the minipool starts before the end of FIX->INSN then this FIX
12387 can not be placed into the current pool. Furthermore, adding the
12388 new constant pool entry may cause the pool to start FIX_SIZE bytes
12389 earlier. */
12390 if (minipool_vector_head &&
12391 (fix->address + get_attr_length (fix->insn)
12392 >= minipool_vector_head->max_address - fix->fix_size))
12393 return NULL;
12395 /* Scan the pool to see if a constant with the same value has
12396 already been added. While we are doing this, also note the
12397 location where we must insert the constant if it doesn't already
12398 exist. */
12399 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12401 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12402 && fix->mode == mp->mode
12403 && (GET_CODE (fix->value) != CODE_LABEL
12404 || (CODE_LABEL_NUMBER (fix->value)
12405 == CODE_LABEL_NUMBER (mp->value)))
12406 && rtx_equal_p (fix->value, mp->value))
12408 /* More than one fix references this entry. */
12409 mp->refcount++;
12410 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12413 /* Note the insertion point if necessary. */
12414 if (max_mp == NULL
12415 && mp->max_address > max_address)
12416 max_mp = mp;
12418 /* If we are inserting an 8-bytes aligned quantity and
12419 we have not already found an insertion point, then
12420 make sure that all such 8-byte aligned quantities are
12421 placed at the start of the pool. */
12422 if (ARM_DOUBLEWORD_ALIGN
12423 && max_mp == NULL
12424 && fix->fix_size >= 8
12425 && mp->fix_size < 8)
12427 max_mp = mp;
12428 max_address = mp->max_address;
12432 /* The value is not currently in the minipool, so we need to create
12433 a new entry for it. If MAX_MP is NULL, the entry will be put on
12434 the end of the list since the placement is less constrained than
12435 any existing entry. Otherwise, we insert the new fix before
12436 MAX_MP and, if necessary, adjust the constraints on the other
12437 entries. */
12438 mp = XNEW (Mnode);
12439 mp->fix_size = fix->fix_size;
12440 mp->mode = fix->mode;
12441 mp->value = fix->value;
12442 mp->refcount = 1;
12443 /* Not yet required for a backwards ref. */
12444 mp->min_address = -65536;
12446 if (max_mp == NULL)
12448 mp->max_address = max_address;
12449 mp->next = NULL;
12450 mp->prev = minipool_vector_tail;
12452 if (mp->prev == NULL)
12454 minipool_vector_head = mp;
12455 minipool_vector_label = gen_label_rtx ();
12457 else
12458 mp->prev->next = mp;
12460 minipool_vector_tail = mp;
12462 else
12464 if (max_address > max_mp->max_address - mp->fix_size)
12465 mp->max_address = max_mp->max_address - mp->fix_size;
12466 else
12467 mp->max_address = max_address;
12469 mp->next = max_mp;
12470 mp->prev = max_mp->prev;
12471 max_mp->prev = mp;
12472 if (mp->prev != NULL)
12473 mp->prev->next = mp;
12474 else
12475 minipool_vector_head = mp;
12478 /* Save the new entry. */
12479 max_mp = mp;
12481 /* Scan over the preceding entries and adjust their addresses as
12482 required. */
12483 while (mp->prev != NULL
12484 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12486 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12487 mp = mp->prev;
12490 return max_mp;
12493 static Mnode *
12494 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12495 HOST_WIDE_INT min_address)
12497 HOST_WIDE_INT offset;
12499 /* The code below assumes these are different. */
12500 gcc_assert (mp != min_mp);
12502 if (min_mp == NULL)
12504 if (min_address > mp->min_address)
12505 mp->min_address = min_address;
12507 else
12509 /* We will adjust this below if it is too loose. */
12510 mp->min_address = min_address;
12512 /* Unlink MP from its current position. Since min_mp is non-null,
12513 mp->next must be non-null. */
12514 mp->next->prev = mp->prev;
12515 if (mp->prev != NULL)
12516 mp->prev->next = mp->next;
12517 else
12518 minipool_vector_head = mp->next;
12520 /* Reinsert it after MIN_MP. */
12521 mp->prev = min_mp;
12522 mp->next = min_mp->next;
12523 min_mp->next = mp;
12524 if (mp->next != NULL)
12525 mp->next->prev = mp;
12526 else
12527 minipool_vector_tail = mp;
12530 min_mp = mp;
12532 offset = 0;
12533 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12535 mp->offset = offset;
12536 if (mp->refcount > 0)
12537 offset += mp->fix_size;
12539 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12540 mp->next->min_address = mp->min_address + mp->fix_size;
12543 return min_mp;
12546 /* Add a constant to the minipool for a backward reference. Returns the
12547 node added or NULL if the constant will not fit in this pool.
12549 Note that the code for insertion for a backwards reference can be
12550 somewhat confusing because the calculated offsets for each fix do
12551 not take into account the size of the pool (which is still under
12552 construction. */
12553 static Mnode *
12554 add_minipool_backward_ref (Mfix *fix)
12556 /* If set, min_mp is the last pool_entry that has a lower constraint
12557 than the one we are trying to add. */
12558 Mnode *min_mp = NULL;
12559 /* This can be negative, since it is only a constraint. */
12560 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12561 Mnode *mp;
12563 /* If we can't reach the current pool from this insn, or if we can't
12564 insert this entry at the end of the pool without pushing other
12565 fixes out of range, then we don't try. This ensures that we
12566 can't fail later on. */
12567 if (min_address >= minipool_barrier->address
12568 || (minipool_vector_tail->min_address + fix->fix_size
12569 >= minipool_barrier->address))
12570 return NULL;
12572 /* Scan the pool to see if a constant with the same value has
12573 already been added. While we are doing this, also note the
12574 location where we must insert the constant if it doesn't already
12575 exist. */
12576 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12578 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12579 && fix->mode == mp->mode
12580 && (GET_CODE (fix->value) != CODE_LABEL
12581 || (CODE_LABEL_NUMBER (fix->value)
12582 == CODE_LABEL_NUMBER (mp->value)))
12583 && rtx_equal_p (fix->value, mp->value)
12584 /* Check that there is enough slack to move this entry to the
12585 end of the table (this is conservative). */
12586 && (mp->max_address
12587 > (minipool_barrier->address
12588 + minipool_vector_tail->offset
12589 + minipool_vector_tail->fix_size)))
12591 mp->refcount++;
12592 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12595 if (min_mp != NULL)
12596 mp->min_address += fix->fix_size;
12597 else
12599 /* Note the insertion point if necessary. */
12600 if (mp->min_address < min_address)
12602 /* For now, we do not allow the insertion of 8-byte alignment
12603 requiring nodes anywhere but at the start of the pool. */
12604 if (ARM_DOUBLEWORD_ALIGN
12605 && fix->fix_size >= 8 && mp->fix_size < 8)
12606 return NULL;
12607 else
12608 min_mp = mp;
12610 else if (mp->max_address
12611 < minipool_barrier->address + mp->offset + fix->fix_size)
12613 /* Inserting before this entry would push the fix beyond
12614 its maximum address (which can happen if we have
12615 re-located a forwards fix); force the new fix to come
12616 after it. */
12617 if (ARM_DOUBLEWORD_ALIGN
12618 && fix->fix_size >= 8 && mp->fix_size < 8)
12619 return NULL;
12620 else
12622 min_mp = mp;
12623 min_address = mp->min_address + fix->fix_size;
12626 /* Do not insert a non-8-byte aligned quantity before 8-byte
12627 aligned quantities. */
12628 else if (ARM_DOUBLEWORD_ALIGN
12629 && fix->fix_size < 8
12630 && mp->fix_size >= 8)
12632 min_mp = mp;
12633 min_address = mp->min_address + fix->fix_size;
12638 /* We need to create a new entry. */
12639 mp = XNEW (Mnode);
12640 mp->fix_size = fix->fix_size;
12641 mp->mode = fix->mode;
12642 mp->value = fix->value;
12643 mp->refcount = 1;
12644 mp->max_address = minipool_barrier->address + 65536;
12646 mp->min_address = min_address;
12648 if (min_mp == NULL)
12650 mp->prev = NULL;
12651 mp->next = minipool_vector_head;
12653 if (mp->next == NULL)
12655 minipool_vector_tail = mp;
12656 minipool_vector_label = gen_label_rtx ();
12658 else
12659 mp->next->prev = mp;
12661 minipool_vector_head = mp;
12663 else
12665 mp->next = min_mp->next;
12666 mp->prev = min_mp;
12667 min_mp->next = mp;
12669 if (mp->next != NULL)
12670 mp->next->prev = mp;
12671 else
12672 minipool_vector_tail = mp;
12675 /* Save the new entry. */
12676 min_mp = mp;
12678 if (mp->prev)
12679 mp = mp->prev;
12680 else
12681 mp->offset = 0;
12683 /* Scan over the following entries and adjust their offsets. */
12684 while (mp->next != NULL)
12686 if (mp->next->min_address < mp->min_address + mp->fix_size)
12687 mp->next->min_address = mp->min_address + mp->fix_size;
12689 if (mp->refcount)
12690 mp->next->offset = mp->offset + mp->fix_size;
12691 else
12692 mp->next->offset = mp->offset;
12694 mp = mp->next;
12697 return min_mp;
12700 static void
12701 assign_minipool_offsets (Mfix *barrier)
12703 HOST_WIDE_INT offset = 0;
12704 Mnode *mp;
12706 minipool_barrier = barrier;
12708 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12710 mp->offset = offset;
12712 if (mp->refcount > 0)
12713 offset += mp->fix_size;
12717 /* Output the literal table */
12718 static void
12719 dump_minipool (rtx scan)
12721 Mnode * mp;
12722 Mnode * nmp;
12723 int align64 = 0;
12725 if (ARM_DOUBLEWORD_ALIGN)
12726 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12727 if (mp->refcount > 0 && mp->fix_size >= 8)
12729 align64 = 1;
12730 break;
12733 if (dump_file)
12734 fprintf (dump_file,
12735 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12736 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12738 scan = emit_label_after (gen_label_rtx (), scan);
12739 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12740 scan = emit_label_after (minipool_vector_label, scan);
12742 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12744 if (mp->refcount > 0)
12746 if (dump_file)
12748 fprintf (dump_file,
12749 ";; Offset %u, min %ld, max %ld ",
12750 (unsigned) mp->offset, (unsigned long) mp->min_address,
12751 (unsigned long) mp->max_address);
12752 arm_print_value (dump_file, mp->value);
12753 fputc ('\n', dump_file);
12756 switch (mp->fix_size)
12758 #ifdef HAVE_consttable_1
12759 case 1:
12760 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12761 break;
12763 #endif
12764 #ifdef HAVE_consttable_2
12765 case 2:
12766 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12767 break;
12769 #endif
12770 #ifdef HAVE_consttable_4
12771 case 4:
12772 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12773 break;
12775 #endif
12776 #ifdef HAVE_consttable_8
12777 case 8:
12778 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12779 break;
12781 #endif
12782 #ifdef HAVE_consttable_16
12783 case 16:
12784 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12785 break;
12787 #endif
12788 default:
12789 gcc_unreachable ();
12793 nmp = mp->next;
12794 free (mp);
12797 minipool_vector_head = minipool_vector_tail = NULL;
12798 scan = emit_insn_after (gen_consttable_end (), scan);
12799 scan = emit_barrier_after (scan);
12802 /* Return the cost of forcibly inserting a barrier after INSN. */
12803 static int
12804 arm_barrier_cost (rtx insn)
12806 /* Basing the location of the pool on the loop depth is preferable,
12807 but at the moment, the basic block information seems to be
12808 corrupt by this stage of the compilation. */
12809 int base_cost = 50;
12810 rtx next = next_nonnote_insn (insn);
12812 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12813 base_cost -= 20;
12815 switch (GET_CODE (insn))
12817 case CODE_LABEL:
12818 /* It will always be better to place the table before the label, rather
12819 than after it. */
12820 return 50;
12822 case INSN:
12823 case CALL_INSN:
12824 return base_cost;
12826 case JUMP_INSN:
12827 return base_cost - 10;
12829 default:
12830 return base_cost + 10;
12834 /* Find the best place in the insn stream in the range
12835 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12836 Create the barrier by inserting a jump and add a new fix entry for
12837 it. */
12838 static Mfix *
12839 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12841 HOST_WIDE_INT count = 0;
12842 rtx barrier;
12843 rtx from = fix->insn;
12844 /* The instruction after which we will insert the jump. */
12845 rtx selected = NULL;
12846 int selected_cost;
12847 /* The address at which the jump instruction will be placed. */
12848 HOST_WIDE_INT selected_address;
12849 Mfix * new_fix;
12850 HOST_WIDE_INT max_count = max_address - fix->address;
12851 rtx label = gen_label_rtx ();
12853 selected_cost = arm_barrier_cost (from);
12854 selected_address = fix->address;
12856 while (from && count < max_count)
12858 rtx tmp;
12859 int new_cost;
12861 /* This code shouldn't have been called if there was a natural barrier
12862 within range. */
12863 gcc_assert (GET_CODE (from) != BARRIER);
12865 /* Count the length of this insn. This must stay in sync with the
12866 code that pushes minipool fixes. */
12867 if (LABEL_P (from))
12868 count += get_label_padding (from);
12869 else
12870 count += get_attr_length (from);
12872 /* If there is a jump table, add its length. */
12873 tmp = is_jump_table (from);
12874 if (tmp != NULL)
12876 count += get_jump_table_size (tmp);
12878 /* Jump tables aren't in a basic block, so base the cost on
12879 the dispatch insn. If we select this location, we will
12880 still put the pool after the table. */
12881 new_cost = arm_barrier_cost (from);
12883 if (count < max_count
12884 && (!selected || new_cost <= selected_cost))
12886 selected = tmp;
12887 selected_cost = new_cost;
12888 selected_address = fix->address + count;
12891 /* Continue after the dispatch table. */
12892 from = NEXT_INSN (tmp);
12893 continue;
12896 new_cost = arm_barrier_cost (from);
12898 if (count < max_count
12899 && (!selected || new_cost <= selected_cost))
12901 selected = from;
12902 selected_cost = new_cost;
12903 selected_address = fix->address + count;
12906 from = NEXT_INSN (from);
12909 /* Make sure that we found a place to insert the jump. */
12910 gcc_assert (selected);
12912 /* Make sure we do not split a call and its corresponding
12913 CALL_ARG_LOCATION note. */
12914 if (CALL_P (selected))
12916 rtx next = NEXT_INSN (selected);
12917 if (next && NOTE_P (next)
12918 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12919 selected = next;
12922 /* Create a new JUMP_INSN that branches around a barrier. */
12923 from = emit_jump_insn_after (gen_jump (label), selected);
12924 JUMP_LABEL (from) = label;
12925 barrier = emit_barrier_after (from);
12926 emit_label_after (label, barrier);
12928 /* Create a minipool barrier entry for the new barrier. */
12929 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12930 new_fix->insn = barrier;
12931 new_fix->address = selected_address;
12932 new_fix->next = fix->next;
12933 fix->next = new_fix;
12935 return new_fix;
12938 /* Record that there is a natural barrier in the insn stream at
12939 ADDRESS. */
12940 static void
12941 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12943 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12945 fix->insn = insn;
12946 fix->address = address;
12948 fix->next = NULL;
12949 if (minipool_fix_head != NULL)
12950 minipool_fix_tail->next = fix;
12951 else
12952 minipool_fix_head = fix;
12954 minipool_fix_tail = fix;
12957 /* Record INSN, which will need fixing up to load a value from the
12958 minipool. ADDRESS is the offset of the insn since the start of the
12959 function; LOC is a pointer to the part of the insn which requires
12960 fixing; VALUE is the constant that must be loaded, which is of type
12961 MODE. */
12962 static void
12963 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12964 enum machine_mode mode, rtx value)
12966 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12968 fix->insn = insn;
12969 fix->address = address;
12970 fix->loc = loc;
12971 fix->mode = mode;
12972 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12973 fix->value = value;
12974 fix->forwards = get_attr_pool_range (insn);
12975 fix->backwards = get_attr_neg_pool_range (insn);
12976 fix->minipool = NULL;
12978 /* If an insn doesn't have a range defined for it, then it isn't
12979 expecting to be reworked by this code. Better to stop now than
12980 to generate duff assembly code. */
12981 gcc_assert (fix->forwards || fix->backwards);
12983 /* If an entry requires 8-byte alignment then assume all constant pools
12984 require 4 bytes of padding. Trying to do this later on a per-pool
12985 basis is awkward because existing pool entries have to be modified. */
12986 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12987 minipool_pad = 4;
12989 if (dump_file)
12991 fprintf (dump_file,
12992 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12993 GET_MODE_NAME (mode),
12994 INSN_UID (insn), (unsigned long) address,
12995 -1 * (long)fix->backwards, (long)fix->forwards);
12996 arm_print_value (dump_file, fix->value);
12997 fprintf (dump_file, "\n");
13000 /* Add it to the chain of fixes. */
13001 fix->next = NULL;
13003 if (minipool_fix_head != NULL)
13004 minipool_fix_tail->next = fix;
13005 else
13006 minipool_fix_head = fix;
13008 minipool_fix_tail = fix;
13011 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13012 Returns the number of insns needed, or 99 if we don't know how to
13013 do it. */
13015 arm_const_double_inline_cost (rtx val)
13017 rtx lowpart, highpart;
13018 enum machine_mode mode;
13020 mode = GET_MODE (val);
13022 if (mode == VOIDmode)
13023 mode = DImode;
13025 gcc_assert (GET_MODE_SIZE (mode) == 8);
13027 lowpart = gen_lowpart (SImode, val);
13028 highpart = gen_highpart_mode (SImode, mode, val);
13030 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13031 gcc_assert (GET_CODE (highpart) == CONST_INT);
13033 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13034 NULL_RTX, NULL_RTX, 0, 0)
13035 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13036 NULL_RTX, NULL_RTX, 0, 0));
13039 /* Return true if it is worthwhile to split a 64-bit constant into two
13040 32-bit operations. This is the case if optimizing for size, or
13041 if we have load delay slots, or if one 32-bit part can be done with
13042 a single data operation. */
13043 bool
13044 arm_const_double_by_parts (rtx val)
13046 enum machine_mode mode = GET_MODE (val);
13047 rtx part;
13049 if (optimize_size || arm_ld_sched)
13050 return true;
13052 if (mode == VOIDmode)
13053 mode = DImode;
13055 part = gen_highpart_mode (SImode, mode, val);
13057 gcc_assert (GET_CODE (part) == CONST_INT);
13059 if (const_ok_for_arm (INTVAL (part))
13060 || const_ok_for_arm (~INTVAL (part)))
13061 return true;
13063 part = gen_lowpart (SImode, val);
13065 gcc_assert (GET_CODE (part) == CONST_INT);
13067 if (const_ok_for_arm (INTVAL (part))
13068 || const_ok_for_arm (~INTVAL (part)))
13069 return true;
13071 return false;
13074 /* Return true if it is possible to inline both the high and low parts
13075 of a 64-bit constant into 32-bit data processing instructions. */
13076 bool
13077 arm_const_double_by_immediates (rtx val)
13079 enum machine_mode mode = GET_MODE (val);
13080 rtx part;
13082 if (mode == VOIDmode)
13083 mode = DImode;
13085 part = gen_highpart_mode (SImode, mode, val);
13087 gcc_assert (GET_CODE (part) == CONST_INT);
13089 if (!const_ok_for_arm (INTVAL (part)))
13090 return false;
13092 part = gen_lowpart (SImode, val);
13094 gcc_assert (GET_CODE (part) == CONST_INT);
13096 if (!const_ok_for_arm (INTVAL (part)))
13097 return false;
13099 return true;
13102 /* Scan INSN and note any of its operands that need fixing.
13103 If DO_PUSHES is false we do not actually push any of the fixups
13104 needed. */
13105 static void
13106 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13108 int opno;
13110 extract_insn (insn);
13112 if (!constrain_operands (1))
13113 fatal_insn_not_found (insn);
13115 if (recog_data.n_alternatives == 0)
13116 return;
13118 /* Fill in recog_op_alt with information about the constraints of
13119 this insn. */
13120 preprocess_constraints ();
13122 for (opno = 0; opno < recog_data.n_operands; opno++)
13124 /* Things we need to fix can only occur in inputs. */
13125 if (recog_data.operand_type[opno] != OP_IN)
13126 continue;
13128 /* If this alternative is a memory reference, then any mention
13129 of constants in this alternative is really to fool reload
13130 into allowing us to accept one there. We need to fix them up
13131 now so that we output the right code. */
13132 if (recog_op_alt[opno][which_alternative].memory_ok)
13134 rtx op = recog_data.operand[opno];
13136 if (CONSTANT_P (op))
13138 if (do_pushes)
13139 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13140 recog_data.operand_mode[opno], op);
13142 else if (GET_CODE (op) == MEM
13143 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13144 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13146 if (do_pushes)
13148 rtx cop = avoid_constant_pool_reference (op);
13150 /* Casting the address of something to a mode narrower
13151 than a word can cause avoid_constant_pool_reference()
13152 to return the pool reference itself. That's no good to
13153 us here. Lets just hope that we can use the
13154 constant pool value directly. */
13155 if (op == cop)
13156 cop = get_pool_constant (XEXP (op, 0));
13158 push_minipool_fix (insn, address,
13159 recog_data.operand_loc[opno],
13160 recog_data.operand_mode[opno], cop);
13167 return;
13170 /* Convert instructions to their cc-clobbering variant if possible, since
13171 that allows us to use smaller encodings. */
13173 static void
13174 thumb2_reorg (void)
13176 basic_block bb;
13177 regset_head live;
13179 INIT_REG_SET (&live);
13181 /* We are freeing block_for_insn in the toplev to keep compatibility
13182 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13183 compute_bb_for_insn ();
13184 df_analyze ();
13186 FOR_EACH_BB (bb)
13188 rtx insn;
13190 COPY_REG_SET (&live, DF_LR_OUT (bb));
13191 df_simulate_initialize_backwards (bb, &live);
13192 FOR_BB_INSNS_REVERSE (bb, insn)
13194 if (NONJUMP_INSN_P (insn)
13195 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13196 && GET_CODE (PATTERN (insn)) == SET)
13198 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13199 rtx pat = PATTERN (insn);
13200 rtx dst = XEXP (pat, 0);
13201 rtx src = XEXP (pat, 1);
13202 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13204 if (!OBJECT_P (src))
13205 op0 = XEXP (src, 0);
13207 if (BINARY_P (src))
13208 op1 = XEXP (src, 1);
13210 if (low_register_operand (dst, SImode))
13212 switch (GET_CODE (src))
13214 case PLUS:
13215 if (low_register_operand (op0, SImode))
13217 /* ADDS <Rd>,<Rn>,<Rm> */
13218 if (low_register_operand (op1, SImode))
13219 action = CONV;
13220 /* ADDS <Rdn>,#<imm8> */
13221 /* SUBS <Rdn>,#<imm8> */
13222 else if (rtx_equal_p (dst, op0)
13223 && CONST_INT_P (op1)
13224 && IN_RANGE (INTVAL (op1), -255, 255))
13225 action = CONV;
13226 /* ADDS <Rd>,<Rn>,#<imm3> */
13227 /* SUBS <Rd>,<Rn>,#<imm3> */
13228 else if (CONST_INT_P (op1)
13229 && IN_RANGE (INTVAL (op1), -7, 7))
13230 action = CONV;
13232 break;
13234 case MINUS:
13235 /* RSBS <Rd>,<Rn>,#0
13236 Not handled here: see NEG below. */
13237 /* SUBS <Rd>,<Rn>,#<imm3>
13238 SUBS <Rdn>,#<imm8>
13239 Not handled here: see PLUS above. */
13240 /* SUBS <Rd>,<Rn>,<Rm> */
13241 if (low_register_operand (op0, SImode)
13242 && low_register_operand (op1, SImode))
13243 action = CONV;
13244 break;
13246 case MULT:
13247 /* MULS <Rdm>,<Rn>,<Rdm>
13248 As an exception to the rule, this is only used
13249 when optimizing for size since MULS is slow on all
13250 known implementations. We do not even want to use
13251 MULS in cold code, if optimizing for speed, so we
13252 test the global flag here. */
13253 if (!optimize_size)
13254 break;
13255 /* else fall through. */
13256 case AND:
13257 case IOR:
13258 case XOR:
13259 /* ANDS <Rdn>,<Rm> */
13260 if (rtx_equal_p (dst, op0)
13261 && low_register_operand (op1, SImode))
13262 action = CONV;
13263 else if (rtx_equal_p (dst, op1)
13264 && low_register_operand (op0, SImode))
13265 action = SWAP_CONV;
13266 break;
13268 case ASHIFTRT:
13269 case ASHIFT:
13270 case LSHIFTRT:
13271 /* ASRS <Rdn>,<Rm> */
13272 /* LSRS <Rdn>,<Rm> */
13273 /* LSLS <Rdn>,<Rm> */
13274 if (rtx_equal_p (dst, op0)
13275 && low_register_operand (op1, SImode))
13276 action = CONV;
13277 /* ASRS <Rd>,<Rm>,#<imm5> */
13278 /* LSRS <Rd>,<Rm>,#<imm5> */
13279 /* LSLS <Rd>,<Rm>,#<imm5> */
13280 else if (low_register_operand (op0, SImode)
13281 && CONST_INT_P (op1)
13282 && IN_RANGE (INTVAL (op1), 0, 31))
13283 action = CONV;
13284 break;
13286 case ROTATERT:
13287 /* RORS <Rdn>,<Rm> */
13288 if (rtx_equal_p (dst, op0)
13289 && low_register_operand (op1, SImode))
13290 action = CONV;
13291 break;
13293 case NOT:
13294 case NEG:
13295 /* MVNS <Rd>,<Rm> */
13296 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13297 if (low_register_operand (op0, SImode))
13298 action = CONV;
13299 break;
13301 case CONST_INT:
13302 /* MOVS <Rd>,#<imm8> */
13303 if (CONST_INT_P (src)
13304 && IN_RANGE (INTVAL (src), 0, 255))
13305 action = CONV;
13306 break;
13308 case REG:
13309 /* MOVS and MOV<c> with registers have different
13310 encodings, so are not relevant here. */
13311 break;
13313 default:
13314 break;
13318 if (action != SKIP)
13320 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13321 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13322 rtvec vec;
13324 if (action == SWAP_CONV)
13326 src = copy_rtx (src);
13327 XEXP (src, 0) = op1;
13328 XEXP (src, 1) = op0;
13329 pat = gen_rtx_SET (VOIDmode, dst, src);
13330 vec = gen_rtvec (2, pat, clobber);
13332 else /* action == CONV */
13333 vec = gen_rtvec (2, pat, clobber);
13335 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13336 INSN_CODE (insn) = -1;
13340 if (NONDEBUG_INSN_P (insn))
13341 df_simulate_one_insn_backwards (bb, insn, &live);
13345 CLEAR_REG_SET (&live);
13348 /* Gcc puts the pool in the wrong place for ARM, since we can only
13349 load addresses a limited distance around the pc. We do some
13350 special munging to move the constant pool values to the correct
13351 point in the code. */
13352 static void
13353 arm_reorg (void)
13355 rtx insn;
13356 HOST_WIDE_INT address = 0;
13357 Mfix * fix;
13359 if (TARGET_THUMB2)
13360 thumb2_reorg ();
13362 minipool_fix_head = minipool_fix_tail = NULL;
13364 /* The first insn must always be a note, or the code below won't
13365 scan it properly. */
13366 insn = get_insns ();
13367 gcc_assert (GET_CODE (insn) == NOTE);
13368 minipool_pad = 0;
13370 /* Scan all the insns and record the operands that will need fixing. */
13371 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13373 if (GET_CODE (insn) == BARRIER)
13374 push_minipool_barrier (insn, address);
13375 else if (INSN_P (insn))
13377 rtx table;
13379 note_invalid_constants (insn, address, true);
13380 address += get_attr_length (insn);
13382 /* If the insn is a vector jump, add the size of the table
13383 and skip the table. */
13384 if ((table = is_jump_table (insn)) != NULL)
13386 address += get_jump_table_size (table);
13387 insn = table;
13390 else if (LABEL_P (insn))
13391 /* Add the worst-case padding due to alignment. We don't add
13392 the _current_ padding because the minipool insertions
13393 themselves might change it. */
13394 address += get_label_padding (insn);
13397 fix = minipool_fix_head;
13399 /* Now scan the fixups and perform the required changes. */
13400 while (fix)
13402 Mfix * ftmp;
13403 Mfix * fdel;
13404 Mfix * last_added_fix;
13405 Mfix * last_barrier = NULL;
13406 Mfix * this_fix;
13408 /* Skip any further barriers before the next fix. */
13409 while (fix && GET_CODE (fix->insn) == BARRIER)
13410 fix = fix->next;
13412 /* No more fixes. */
13413 if (fix == NULL)
13414 break;
13416 last_added_fix = NULL;
13418 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13420 if (GET_CODE (ftmp->insn) == BARRIER)
13422 if (ftmp->address >= minipool_vector_head->max_address)
13423 break;
13425 last_barrier = ftmp;
13427 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13428 break;
13430 last_added_fix = ftmp; /* Keep track of the last fix added. */
13433 /* If we found a barrier, drop back to that; any fixes that we
13434 could have reached but come after the barrier will now go in
13435 the next mini-pool. */
13436 if (last_barrier != NULL)
13438 /* Reduce the refcount for those fixes that won't go into this
13439 pool after all. */
13440 for (fdel = last_barrier->next;
13441 fdel && fdel != ftmp;
13442 fdel = fdel->next)
13444 fdel->minipool->refcount--;
13445 fdel->minipool = NULL;
13448 ftmp = last_barrier;
13450 else
13452 /* ftmp is first fix that we can't fit into this pool and
13453 there no natural barriers that we could use. Insert a
13454 new barrier in the code somewhere between the previous
13455 fix and this one, and arrange to jump around it. */
13456 HOST_WIDE_INT max_address;
13458 /* The last item on the list of fixes must be a barrier, so
13459 we can never run off the end of the list of fixes without
13460 last_barrier being set. */
13461 gcc_assert (ftmp);
13463 max_address = minipool_vector_head->max_address;
13464 /* Check that there isn't another fix that is in range that
13465 we couldn't fit into this pool because the pool was
13466 already too large: we need to put the pool before such an
13467 instruction. The pool itself may come just after the
13468 fix because create_fix_barrier also allows space for a
13469 jump instruction. */
13470 if (ftmp->address < max_address)
13471 max_address = ftmp->address + 1;
13473 last_barrier = create_fix_barrier (last_added_fix, max_address);
13476 assign_minipool_offsets (last_barrier);
13478 while (ftmp)
13480 if (GET_CODE (ftmp->insn) != BARRIER
13481 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13482 == NULL))
13483 break;
13485 ftmp = ftmp->next;
13488 /* Scan over the fixes we have identified for this pool, fixing them
13489 up and adding the constants to the pool itself. */
13490 for (this_fix = fix; this_fix && ftmp != this_fix;
13491 this_fix = this_fix->next)
13492 if (GET_CODE (this_fix->insn) != BARRIER)
13494 rtx addr
13495 = plus_constant (Pmode,
13496 gen_rtx_LABEL_REF (VOIDmode,
13497 minipool_vector_label),
13498 this_fix->minipool->offset);
13499 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13502 dump_minipool (last_barrier->insn);
13503 fix = ftmp;
13506 /* From now on we must synthesize any constants that we can't handle
13507 directly. This can happen if the RTL gets split during final
13508 instruction generation. */
13509 after_arm_reorg = 1;
13511 /* Free the minipool memory. */
13512 obstack_free (&minipool_obstack, minipool_startobj);
13515 /* Routines to output assembly language. */
13517 /* If the rtx is the correct value then return the string of the number.
13518 In this way we can ensure that valid double constants are generated even
13519 when cross compiling. */
13520 const char *
13521 fp_immediate_constant (rtx x)
13523 REAL_VALUE_TYPE r;
13525 if (!fp_consts_inited)
13526 init_fp_table ();
13528 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13530 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13531 return "0";
13534 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13535 static const char *
13536 fp_const_from_val (REAL_VALUE_TYPE *r)
13538 if (!fp_consts_inited)
13539 init_fp_table ();
13541 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13542 return "0";
13545 /* OPERANDS[0] is the entire list of insns that constitute pop,
13546 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13547 is in the list, UPDATE is true iff the list contains explicit
13548 update of base register. */
13549 void
13550 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13551 bool update)
13553 int i;
13554 char pattern[100];
13555 int offset;
13556 const char *conditional;
13557 int num_saves = XVECLEN (operands[0], 0);
13558 unsigned int regno;
13559 unsigned int regno_base = REGNO (operands[1]);
13561 offset = 0;
13562 offset += update ? 1 : 0;
13563 offset += return_pc ? 1 : 0;
13565 /* Is the base register in the list? */
13566 for (i = offset; i < num_saves; i++)
13568 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13569 /* If SP is in the list, then the base register must be SP. */
13570 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13571 /* If base register is in the list, there must be no explicit update. */
13572 if (regno == regno_base)
13573 gcc_assert (!update);
13576 conditional = reverse ? "%?%D0" : "%?%d0";
13577 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13579 /* Output pop (not stmfd) because it has a shorter encoding. */
13580 gcc_assert (update);
13581 sprintf (pattern, "pop%s\t{", conditional);
13583 else
13585 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13586 It's just a convention, their semantics are identical. */
13587 if (regno_base == SP_REGNUM)
13588 sprintf (pattern, "ldm%sfd\t", conditional);
13589 else if (TARGET_UNIFIED_ASM)
13590 sprintf (pattern, "ldmia%s\t", conditional);
13591 else
13592 sprintf (pattern, "ldm%sia\t", conditional);
13594 strcat (pattern, reg_names[regno_base]);
13595 if (update)
13596 strcat (pattern, "!, {");
13597 else
13598 strcat (pattern, ", {");
13601 /* Output the first destination register. */
13602 strcat (pattern,
13603 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13605 /* Output the rest of the destination registers. */
13606 for (i = offset + 1; i < num_saves; i++)
13608 strcat (pattern, ", ");
13609 strcat (pattern,
13610 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13613 strcat (pattern, "}");
13615 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13616 strcat (pattern, "^");
13618 output_asm_insn (pattern, &cond);
13622 /* Output the assembly for a store multiple. */
13624 const char *
13625 vfp_output_fstmd (rtx * operands)
13627 char pattern[100];
13628 int p;
13629 int base;
13630 int i;
13632 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13633 p = strlen (pattern);
13635 gcc_assert (GET_CODE (operands[1]) == REG);
13637 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13638 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13640 p += sprintf (&pattern[p], ", d%d", base + i);
13642 strcpy (&pattern[p], "}");
13644 output_asm_insn (pattern, operands);
13645 return "";
13649 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13650 number of bytes pushed. */
13652 static int
13653 vfp_emit_fstmd (int base_reg, int count)
13655 rtx par;
13656 rtx dwarf;
13657 rtx tmp, reg;
13658 int i;
13660 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13661 register pairs are stored by a store multiple insn. We avoid this
13662 by pushing an extra pair. */
13663 if (count == 2 && !arm_arch6)
13665 if (base_reg == LAST_VFP_REGNUM - 3)
13666 base_reg -= 2;
13667 count++;
13670 /* FSTMD may not store more than 16 doubleword registers at once. Split
13671 larger stores into multiple parts (up to a maximum of two, in
13672 practice). */
13673 if (count > 16)
13675 int saved;
13676 /* NOTE: base_reg is an internal register number, so each D register
13677 counts as 2. */
13678 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13679 saved += vfp_emit_fstmd (base_reg, 16);
13680 return saved;
13683 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13684 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13686 reg = gen_rtx_REG (DFmode, base_reg);
13687 base_reg += 2;
13689 XVECEXP (par, 0, 0)
13690 = gen_rtx_SET (VOIDmode,
13691 gen_frame_mem
13692 (BLKmode,
13693 gen_rtx_PRE_MODIFY (Pmode,
13694 stack_pointer_rtx,
13695 plus_constant
13696 (Pmode, stack_pointer_rtx,
13697 - (count * 8)))
13699 gen_rtx_UNSPEC (BLKmode,
13700 gen_rtvec (1, reg),
13701 UNSPEC_PUSH_MULT));
13703 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13704 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13705 RTX_FRAME_RELATED_P (tmp) = 1;
13706 XVECEXP (dwarf, 0, 0) = tmp;
13708 tmp = gen_rtx_SET (VOIDmode,
13709 gen_frame_mem (DFmode, stack_pointer_rtx),
13710 reg);
13711 RTX_FRAME_RELATED_P (tmp) = 1;
13712 XVECEXP (dwarf, 0, 1) = tmp;
13714 for (i = 1; i < count; i++)
13716 reg = gen_rtx_REG (DFmode, base_reg);
13717 base_reg += 2;
13718 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13720 tmp = gen_rtx_SET (VOIDmode,
13721 gen_frame_mem (DFmode,
13722 plus_constant (Pmode,
13723 stack_pointer_rtx,
13724 i * 8)),
13725 reg);
13726 RTX_FRAME_RELATED_P (tmp) = 1;
13727 XVECEXP (dwarf, 0, i + 1) = tmp;
13730 par = emit_insn (par);
13731 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13732 RTX_FRAME_RELATED_P (par) = 1;
13734 return count * 8;
13737 /* Emit a call instruction with pattern PAT. ADDR is the address of
13738 the call target. */
13740 void
13741 arm_emit_call_insn (rtx pat, rtx addr)
13743 rtx insn;
13745 insn = emit_call_insn (pat);
13747 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13748 If the call might use such an entry, add a use of the PIC register
13749 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13750 if (TARGET_VXWORKS_RTP
13751 && flag_pic
13752 && GET_CODE (addr) == SYMBOL_REF
13753 && (SYMBOL_REF_DECL (addr)
13754 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13755 : !SYMBOL_REF_LOCAL_P (addr)))
13757 require_pic_register ();
13758 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13762 /* Output a 'call' insn. */
13763 const char *
13764 output_call (rtx *operands)
13766 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13768 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13769 if (REGNO (operands[0]) == LR_REGNUM)
13771 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13772 output_asm_insn ("mov%?\t%0, %|lr", operands);
13775 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13777 if (TARGET_INTERWORK || arm_arch4t)
13778 output_asm_insn ("bx%?\t%0", operands);
13779 else
13780 output_asm_insn ("mov%?\t%|pc, %0", operands);
13782 return "";
13785 /* Output a 'call' insn that is a reference in memory. This is
13786 disabled for ARMv5 and we prefer a blx instead because otherwise
13787 there's a significant performance overhead. */
13788 const char *
13789 output_call_mem (rtx *operands)
13791 gcc_assert (!arm_arch5);
13792 if (TARGET_INTERWORK)
13794 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13795 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13796 output_asm_insn ("bx%?\t%|ip", operands);
13798 else if (regno_use_in (LR_REGNUM, operands[0]))
13800 /* LR is used in the memory address. We load the address in the
13801 first instruction. It's safe to use IP as the target of the
13802 load since the call will kill it anyway. */
13803 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13804 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13805 if (arm_arch4t)
13806 output_asm_insn ("bx%?\t%|ip", operands);
13807 else
13808 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13810 else
13812 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13813 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13816 return "";
13820 /* Output a move from arm registers to arm registers of a long double
13821 OPERANDS[0] is the destination.
13822 OPERANDS[1] is the source. */
13823 const char *
13824 output_mov_long_double_arm_from_arm (rtx *operands)
13826 /* We have to be careful here because the two might overlap. */
13827 int dest_start = REGNO (operands[0]);
13828 int src_start = REGNO (operands[1]);
13829 rtx ops[2];
13830 int i;
13832 if (dest_start < src_start)
13834 for (i = 0; i < 3; i++)
13836 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13837 ops[1] = gen_rtx_REG (SImode, src_start + i);
13838 output_asm_insn ("mov%?\t%0, %1", ops);
13841 else
13843 for (i = 2; i >= 0; i--)
13845 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13846 ops[1] = gen_rtx_REG (SImode, src_start + i);
13847 output_asm_insn ("mov%?\t%0, %1", ops);
13851 return "";
13854 void
13855 arm_emit_movpair (rtx dest, rtx src)
13857 /* If the src is an immediate, simplify it. */
13858 if (CONST_INT_P (src))
13860 HOST_WIDE_INT val = INTVAL (src);
13861 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13862 if ((val >> 16) & 0x0000ffff)
13863 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13864 GEN_INT (16)),
13865 GEN_INT ((val >> 16) & 0x0000ffff));
13866 return;
13868 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13869 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13872 /* Output a move between double words. It must be REG<-MEM
13873 or MEM<-REG. */
13874 const char *
13875 output_move_double (rtx *operands, bool emit, int *count)
13877 enum rtx_code code0 = GET_CODE (operands[0]);
13878 enum rtx_code code1 = GET_CODE (operands[1]);
13879 rtx otherops[3];
13880 if (count)
13881 *count = 1;
13883 /* The only case when this might happen is when
13884 you are looking at the length of a DImode instruction
13885 that has an invalid constant in it. */
13886 if (code0 == REG && code1 != MEM)
13888 gcc_assert (!emit);
13889 *count = 2;
13890 return "";
13893 if (code0 == REG)
13895 unsigned int reg0 = REGNO (operands[0]);
13897 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13899 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13901 switch (GET_CODE (XEXP (operands[1], 0)))
13903 case REG:
13905 if (emit)
13907 if (TARGET_LDRD
13908 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13909 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13910 else
13911 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13913 break;
13915 case PRE_INC:
13916 gcc_assert (TARGET_LDRD);
13917 if (emit)
13918 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13919 break;
13921 case PRE_DEC:
13922 if (emit)
13924 if (TARGET_LDRD)
13925 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13926 else
13927 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13929 break;
13931 case POST_INC:
13932 if (emit)
13934 if (TARGET_LDRD)
13935 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13936 else
13937 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13939 break;
13941 case POST_DEC:
13942 gcc_assert (TARGET_LDRD);
13943 if (emit)
13944 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13945 break;
13947 case PRE_MODIFY:
13948 case POST_MODIFY:
13949 /* Autoicrement addressing modes should never have overlapping
13950 base and destination registers, and overlapping index registers
13951 are already prohibited, so this doesn't need to worry about
13952 fix_cm3_ldrd. */
13953 otherops[0] = operands[0];
13954 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13955 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13957 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13959 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13961 /* Registers overlap so split out the increment. */
13962 if (emit)
13964 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13965 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13967 if (count)
13968 *count = 2;
13970 else
13972 /* Use a single insn if we can.
13973 FIXME: IWMMXT allows offsets larger than ldrd can
13974 handle, fix these up with a pair of ldr. */
13975 if (TARGET_THUMB2
13976 || GET_CODE (otherops[2]) != CONST_INT
13977 || (INTVAL (otherops[2]) > -256
13978 && INTVAL (otherops[2]) < 256))
13980 if (emit)
13981 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13983 else
13985 if (emit)
13987 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13988 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13990 if (count)
13991 *count = 2;
13996 else
13998 /* Use a single insn if we can.
13999 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14000 fix these up with a pair of ldr. */
14001 if (TARGET_THUMB2
14002 || GET_CODE (otherops[2]) != CONST_INT
14003 || (INTVAL (otherops[2]) > -256
14004 && INTVAL (otherops[2]) < 256))
14006 if (emit)
14007 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14009 else
14011 if (emit)
14013 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14014 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14016 if (count)
14017 *count = 2;
14020 break;
14022 case LABEL_REF:
14023 case CONST:
14024 /* We might be able to use ldrd %0, %1 here. However the range is
14025 different to ldr/adr, and it is broken on some ARMv7-M
14026 implementations. */
14027 /* Use the second register of the pair to avoid problematic
14028 overlap. */
14029 otherops[1] = operands[1];
14030 if (emit)
14031 output_asm_insn ("adr%?\t%0, %1", otherops);
14032 operands[1] = otherops[0];
14033 if (emit)
14035 if (TARGET_LDRD)
14036 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14037 else
14038 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14041 if (count)
14042 *count = 2;
14043 break;
14045 /* ??? This needs checking for thumb2. */
14046 default:
14047 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14048 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14050 otherops[0] = operands[0];
14051 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14052 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14054 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14056 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14058 switch ((int) INTVAL (otherops[2]))
14060 case -8:
14061 if (emit)
14062 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14063 return "";
14064 case -4:
14065 if (TARGET_THUMB2)
14066 break;
14067 if (emit)
14068 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14069 return "";
14070 case 4:
14071 if (TARGET_THUMB2)
14072 break;
14073 if (emit)
14074 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14075 return "";
14078 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14079 operands[1] = otherops[0];
14080 if (TARGET_LDRD
14081 && (GET_CODE (otherops[2]) == REG
14082 || TARGET_THUMB2
14083 || (GET_CODE (otherops[2]) == CONST_INT
14084 && INTVAL (otherops[2]) > -256
14085 && INTVAL (otherops[2]) < 256)))
14087 if (reg_overlap_mentioned_p (operands[0],
14088 otherops[2]))
14090 rtx tmp;
14091 /* Swap base and index registers over to
14092 avoid a conflict. */
14093 tmp = otherops[1];
14094 otherops[1] = otherops[2];
14095 otherops[2] = tmp;
14097 /* If both registers conflict, it will usually
14098 have been fixed by a splitter. */
14099 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14100 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14102 if (emit)
14104 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14105 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14107 if (count)
14108 *count = 2;
14110 else
14112 otherops[0] = operands[0];
14113 if (emit)
14114 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14116 return "";
14119 if (GET_CODE (otherops[2]) == CONST_INT)
14121 if (emit)
14123 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14124 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14125 else
14126 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14129 else
14131 if (emit)
14132 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14135 else
14137 if (emit)
14138 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14141 if (count)
14142 *count = 2;
14144 if (TARGET_LDRD)
14145 return "ldr%(d%)\t%0, [%1]";
14147 return "ldm%(ia%)\t%1, %M0";
14149 else
14151 otherops[1] = adjust_address (operands[1], SImode, 4);
14152 /* Take care of overlapping base/data reg. */
14153 if (reg_mentioned_p (operands[0], operands[1]))
14155 if (emit)
14157 output_asm_insn ("ldr%?\t%0, %1", otherops);
14158 output_asm_insn ("ldr%?\t%0, %1", operands);
14160 if (count)
14161 *count = 2;
14164 else
14166 if (emit)
14168 output_asm_insn ("ldr%?\t%0, %1", operands);
14169 output_asm_insn ("ldr%?\t%0, %1", otherops);
14171 if (count)
14172 *count = 2;
14177 else
14179 /* Constraints should ensure this. */
14180 gcc_assert (code0 == MEM && code1 == REG);
14181 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14183 switch (GET_CODE (XEXP (operands[0], 0)))
14185 case REG:
14186 if (emit)
14188 if (TARGET_LDRD)
14189 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14190 else
14191 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14193 break;
14195 case PRE_INC:
14196 gcc_assert (TARGET_LDRD);
14197 if (emit)
14198 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14199 break;
14201 case PRE_DEC:
14202 if (emit)
14204 if (TARGET_LDRD)
14205 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14206 else
14207 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14209 break;
14211 case POST_INC:
14212 if (emit)
14214 if (TARGET_LDRD)
14215 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14216 else
14217 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14219 break;
14221 case POST_DEC:
14222 gcc_assert (TARGET_LDRD);
14223 if (emit)
14224 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14225 break;
14227 case PRE_MODIFY:
14228 case POST_MODIFY:
14229 otherops[0] = operands[1];
14230 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14231 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14233 /* IWMMXT allows offsets larger than ldrd can handle,
14234 fix these up with a pair of ldr. */
14235 if (!TARGET_THUMB2
14236 && GET_CODE (otherops[2]) == CONST_INT
14237 && (INTVAL(otherops[2]) <= -256
14238 || INTVAL(otherops[2]) >= 256))
14240 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14242 if (emit)
14244 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14245 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14247 if (count)
14248 *count = 2;
14250 else
14252 if (emit)
14254 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14255 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14257 if (count)
14258 *count = 2;
14261 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14263 if (emit)
14264 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14266 else
14268 if (emit)
14269 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14271 break;
14273 case PLUS:
14274 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14275 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14277 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14279 case -8:
14280 if (emit)
14281 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14282 return "";
14284 case -4:
14285 if (TARGET_THUMB2)
14286 break;
14287 if (emit)
14288 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14289 return "";
14291 case 4:
14292 if (TARGET_THUMB2)
14293 break;
14294 if (emit)
14295 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14296 return "";
14299 if (TARGET_LDRD
14300 && (GET_CODE (otherops[2]) == REG
14301 || TARGET_THUMB2
14302 || (GET_CODE (otherops[2]) == CONST_INT
14303 && INTVAL (otherops[2]) > -256
14304 && INTVAL (otherops[2]) < 256)))
14306 otherops[0] = operands[1];
14307 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14308 if (emit)
14309 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14310 return "";
14312 /* Fall through */
14314 default:
14315 otherops[0] = adjust_address (operands[0], SImode, 4);
14316 otherops[1] = operands[1];
14317 if (emit)
14319 output_asm_insn ("str%?\t%1, %0", operands);
14320 output_asm_insn ("str%?\t%H1, %0", otherops);
14322 if (count)
14323 *count = 2;
14327 return "";
14330 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14331 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14333 const char *
14334 output_move_quad (rtx *operands)
14336 if (REG_P (operands[0]))
14338 /* Load, or reg->reg move. */
14340 if (MEM_P (operands[1]))
14342 switch (GET_CODE (XEXP (operands[1], 0)))
14344 case REG:
14345 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14346 break;
14348 case LABEL_REF:
14349 case CONST:
14350 output_asm_insn ("adr%?\t%0, %1", operands);
14351 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14352 break;
14354 default:
14355 gcc_unreachable ();
14358 else
14360 rtx ops[2];
14361 int dest, src, i;
14363 gcc_assert (REG_P (operands[1]));
14365 dest = REGNO (operands[0]);
14366 src = REGNO (operands[1]);
14368 /* This seems pretty dumb, but hopefully GCC won't try to do it
14369 very often. */
14370 if (dest < src)
14371 for (i = 0; i < 4; i++)
14373 ops[0] = gen_rtx_REG (SImode, dest + i);
14374 ops[1] = gen_rtx_REG (SImode, src + i);
14375 output_asm_insn ("mov%?\t%0, %1", ops);
14377 else
14378 for (i = 3; i >= 0; i--)
14380 ops[0] = gen_rtx_REG (SImode, dest + i);
14381 ops[1] = gen_rtx_REG (SImode, src + i);
14382 output_asm_insn ("mov%?\t%0, %1", ops);
14386 else
14388 gcc_assert (MEM_P (operands[0]));
14389 gcc_assert (REG_P (operands[1]));
14390 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14392 switch (GET_CODE (XEXP (operands[0], 0)))
14394 case REG:
14395 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14396 break;
14398 default:
14399 gcc_unreachable ();
14403 return "";
14406 /* Output a VFP load or store instruction. */
14408 const char *
14409 output_move_vfp (rtx *operands)
14411 rtx reg, mem, addr, ops[2];
14412 int load = REG_P (operands[0]);
14413 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14414 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14415 const char *templ;
14416 char buff[50];
14417 enum machine_mode mode;
14419 reg = operands[!load];
14420 mem = operands[load];
14422 mode = GET_MODE (reg);
14424 gcc_assert (REG_P (reg));
14425 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14426 gcc_assert (mode == SFmode
14427 || mode == DFmode
14428 || mode == SImode
14429 || mode == DImode
14430 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14431 gcc_assert (MEM_P (mem));
14433 addr = XEXP (mem, 0);
14435 switch (GET_CODE (addr))
14437 case PRE_DEC:
14438 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14439 ops[0] = XEXP (addr, 0);
14440 ops[1] = reg;
14441 break;
14443 case POST_INC:
14444 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14445 ops[0] = XEXP (addr, 0);
14446 ops[1] = reg;
14447 break;
14449 default:
14450 templ = "f%s%c%%?\t%%%s0, %%1%s";
14451 ops[0] = reg;
14452 ops[1] = mem;
14453 break;
14456 sprintf (buff, templ,
14457 load ? "ld" : "st",
14458 dp ? 'd' : 's',
14459 dp ? "P" : "",
14460 integer_p ? "\t%@ int" : "");
14461 output_asm_insn (buff, ops);
14463 return "";
14466 /* Output a Neon quad-word load or store, or a load or store for
14467 larger structure modes.
14469 WARNING: The ordering of elements is weird in big-endian mode,
14470 because we use VSTM, as required by the EABI. GCC RTL defines
14471 element ordering based on in-memory order. This can be differ
14472 from the architectural ordering of elements within a NEON register.
14473 The intrinsics defined in arm_neon.h use the NEON register element
14474 ordering, not the GCC RTL element ordering.
14476 For example, the in-memory ordering of a big-endian a quadword
14477 vector with 16-bit elements when stored from register pair {d0,d1}
14478 will be (lowest address first, d0[N] is NEON register element N):
14480 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14482 When necessary, quadword registers (dN, dN+1) are moved to ARM
14483 registers from rN in the order:
14485 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14487 So that STM/LDM can be used on vectors in ARM registers, and the
14488 same memory layout will result as if VSTM/VLDM were used. */
14490 const char *
14491 output_move_neon (rtx *operands)
14493 rtx reg, mem, addr, ops[2];
14494 int regno, load = REG_P (operands[0]);
14495 const char *templ;
14496 char buff[50];
14497 enum machine_mode mode;
14499 reg = operands[!load];
14500 mem = operands[load];
14502 mode = GET_MODE (reg);
14504 gcc_assert (REG_P (reg));
14505 regno = REGNO (reg);
14506 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14507 || NEON_REGNO_OK_FOR_QUAD (regno));
14508 gcc_assert (VALID_NEON_DREG_MODE (mode)
14509 || VALID_NEON_QREG_MODE (mode)
14510 || VALID_NEON_STRUCT_MODE (mode));
14511 gcc_assert (MEM_P (mem));
14513 addr = XEXP (mem, 0);
14515 /* Strip off const from addresses like (const (plus (...))). */
14516 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14517 addr = XEXP (addr, 0);
14519 switch (GET_CODE (addr))
14521 case POST_INC:
14522 templ = "v%smia%%?\t%%0!, %%h1";
14523 ops[0] = XEXP (addr, 0);
14524 ops[1] = reg;
14525 break;
14527 case PRE_DEC:
14528 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14529 templ = "v%smdb%%?\t%%0!, %%h1";
14530 ops[0] = XEXP (addr, 0);
14531 ops[1] = reg;
14532 break;
14534 case POST_MODIFY:
14535 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14536 gcc_unreachable ();
14538 case LABEL_REF:
14539 case PLUS:
14541 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14542 int i;
14543 int overlap = -1;
14544 for (i = 0; i < nregs; i++)
14546 /* We're only using DImode here because it's a convenient size. */
14547 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14548 ops[1] = adjust_address (mem, DImode, 8 * i);
14549 if (reg_overlap_mentioned_p (ops[0], mem))
14551 gcc_assert (overlap == -1);
14552 overlap = i;
14554 else
14556 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14557 output_asm_insn (buff, ops);
14560 if (overlap != -1)
14562 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14563 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14564 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14565 output_asm_insn (buff, ops);
14568 return "";
14571 default:
14572 templ = "v%smia%%?\t%%m0, %%h1";
14573 ops[0] = mem;
14574 ops[1] = reg;
14577 sprintf (buff, templ, load ? "ld" : "st");
14578 output_asm_insn (buff, ops);
14580 return "";
14583 /* Compute and return the length of neon_mov<mode>, where <mode> is
14584 one of VSTRUCT modes: EI, OI, CI or XI. */
14586 arm_attr_length_move_neon (rtx insn)
14588 rtx reg, mem, addr;
14589 int load;
14590 enum machine_mode mode;
14592 extract_insn_cached (insn);
14594 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14596 mode = GET_MODE (recog_data.operand[0]);
14597 switch (mode)
14599 case EImode:
14600 case OImode:
14601 return 8;
14602 case CImode:
14603 return 12;
14604 case XImode:
14605 return 16;
14606 default:
14607 gcc_unreachable ();
14611 load = REG_P (recog_data.operand[0]);
14612 reg = recog_data.operand[!load];
14613 mem = recog_data.operand[load];
14615 gcc_assert (MEM_P (mem));
14617 mode = GET_MODE (reg);
14618 addr = XEXP (mem, 0);
14620 /* Strip off const from addresses like (const (plus (...))). */
14621 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14622 addr = XEXP (addr, 0);
14624 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14626 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14627 return insns * 4;
14629 else
14630 return 4;
14633 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14634 return zero. */
14637 arm_address_offset_is_imm (rtx insn)
14639 rtx mem, addr;
14641 extract_insn_cached (insn);
14643 if (REG_P (recog_data.operand[0]))
14644 return 0;
14646 mem = recog_data.operand[0];
14648 gcc_assert (MEM_P (mem));
14650 addr = XEXP (mem, 0);
14652 if (GET_CODE (addr) == REG
14653 || (GET_CODE (addr) == PLUS
14654 && GET_CODE (XEXP (addr, 0)) == REG
14655 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14656 return 1;
14657 else
14658 return 0;
14661 /* Output an ADD r, s, #n where n may be too big for one instruction.
14662 If adding zero to one register, output nothing. */
14663 const char *
14664 output_add_immediate (rtx *operands)
14666 HOST_WIDE_INT n = INTVAL (operands[2]);
14668 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14670 if (n < 0)
14671 output_multi_immediate (operands,
14672 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14673 -n);
14674 else
14675 output_multi_immediate (operands,
14676 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14680 return "";
14683 /* Output a multiple immediate operation.
14684 OPERANDS is the vector of operands referred to in the output patterns.
14685 INSTR1 is the output pattern to use for the first constant.
14686 INSTR2 is the output pattern to use for subsequent constants.
14687 IMMED_OP is the index of the constant slot in OPERANDS.
14688 N is the constant value. */
14689 static const char *
14690 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14691 int immed_op, HOST_WIDE_INT n)
14693 #if HOST_BITS_PER_WIDE_INT > 32
14694 n &= 0xffffffff;
14695 #endif
14697 if (n == 0)
14699 /* Quick and easy output. */
14700 operands[immed_op] = const0_rtx;
14701 output_asm_insn (instr1, operands);
14703 else
14705 int i;
14706 const char * instr = instr1;
14708 /* Note that n is never zero here (which would give no output). */
14709 for (i = 0; i < 32; i += 2)
14711 if (n & (3 << i))
14713 operands[immed_op] = GEN_INT (n & (255 << i));
14714 output_asm_insn (instr, operands);
14715 instr = instr2;
14716 i += 6;
14721 return "";
14724 /* Return the name of a shifter operation. */
14725 static const char *
14726 arm_shift_nmem(enum rtx_code code)
14728 switch (code)
14730 case ASHIFT:
14731 return ARM_LSL_NAME;
14733 case ASHIFTRT:
14734 return "asr";
14736 case LSHIFTRT:
14737 return "lsr";
14739 case ROTATERT:
14740 return "ror";
14742 default:
14743 abort();
14747 /* Return the appropriate ARM instruction for the operation code.
14748 The returned result should not be overwritten. OP is the rtx of the
14749 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14750 was shifted. */
14751 const char *
14752 arithmetic_instr (rtx op, int shift_first_arg)
14754 switch (GET_CODE (op))
14756 case PLUS:
14757 return "add";
14759 case MINUS:
14760 return shift_first_arg ? "rsb" : "sub";
14762 case IOR:
14763 return "orr";
14765 case XOR:
14766 return "eor";
14768 case AND:
14769 return "and";
14771 case ASHIFT:
14772 case ASHIFTRT:
14773 case LSHIFTRT:
14774 case ROTATERT:
14775 return arm_shift_nmem(GET_CODE(op));
14777 default:
14778 gcc_unreachable ();
14782 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14783 for the operation code. The returned result should not be overwritten.
14784 OP is the rtx code of the shift.
14785 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14786 shift. */
14787 static const char *
14788 shift_op (rtx op, HOST_WIDE_INT *amountp)
14790 const char * mnem;
14791 enum rtx_code code = GET_CODE (op);
14793 switch (GET_CODE (XEXP (op, 1)))
14795 case REG:
14796 case SUBREG:
14797 *amountp = -1;
14798 break;
14800 case CONST_INT:
14801 *amountp = INTVAL (XEXP (op, 1));
14802 break;
14804 default:
14805 gcc_unreachable ();
14808 switch (code)
14810 case ROTATE:
14811 gcc_assert (*amountp != -1);
14812 *amountp = 32 - *amountp;
14813 code = ROTATERT;
14815 /* Fall through. */
14817 case ASHIFT:
14818 case ASHIFTRT:
14819 case LSHIFTRT:
14820 case ROTATERT:
14821 mnem = arm_shift_nmem(code);
14822 break;
14824 case MULT:
14825 /* We never have to worry about the amount being other than a
14826 power of 2, since this case can never be reloaded from a reg. */
14827 gcc_assert (*amountp != -1);
14828 *amountp = int_log2 (*amountp);
14829 return ARM_LSL_NAME;
14831 default:
14832 gcc_unreachable ();
14835 if (*amountp != -1)
14837 /* This is not 100% correct, but follows from the desire to merge
14838 multiplication by a power of 2 with the recognizer for a
14839 shift. >=32 is not a valid shift for "lsl", so we must try and
14840 output a shift that produces the correct arithmetical result.
14841 Using lsr #32 is identical except for the fact that the carry bit
14842 is not set correctly if we set the flags; but we never use the
14843 carry bit from such an operation, so we can ignore that. */
14844 if (code == ROTATERT)
14845 /* Rotate is just modulo 32. */
14846 *amountp &= 31;
14847 else if (*amountp != (*amountp & 31))
14849 if (code == ASHIFT)
14850 mnem = "lsr";
14851 *amountp = 32;
14854 /* Shifts of 0 are no-ops. */
14855 if (*amountp == 0)
14856 return NULL;
14859 return mnem;
14862 /* Obtain the shift from the POWER of two. */
14864 static HOST_WIDE_INT
14865 int_log2 (HOST_WIDE_INT power)
14867 HOST_WIDE_INT shift = 0;
14869 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14871 gcc_assert (shift <= 31);
14872 shift++;
14875 return shift;
14878 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14879 because /bin/as is horribly restrictive. The judgement about
14880 whether or not each character is 'printable' (and can be output as
14881 is) or not (and must be printed with an octal escape) must be made
14882 with reference to the *host* character set -- the situation is
14883 similar to that discussed in the comments above pp_c_char in
14884 c-pretty-print.c. */
14886 #define MAX_ASCII_LEN 51
14888 void
14889 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14891 int i;
14892 int len_so_far = 0;
14894 fputs ("\t.ascii\t\"", stream);
14896 for (i = 0; i < len; i++)
14898 int c = p[i];
14900 if (len_so_far >= MAX_ASCII_LEN)
14902 fputs ("\"\n\t.ascii\t\"", stream);
14903 len_so_far = 0;
14906 if (ISPRINT (c))
14908 if (c == '\\' || c == '\"')
14910 putc ('\\', stream);
14911 len_so_far++;
14913 putc (c, stream);
14914 len_so_far++;
14916 else
14918 fprintf (stream, "\\%03o", c);
14919 len_so_far += 4;
14923 fputs ("\"\n", stream);
14926 /* Compute the register save mask for registers 0 through 12
14927 inclusive. This code is used by arm_compute_save_reg_mask. */
14929 static unsigned long
14930 arm_compute_save_reg0_reg12_mask (void)
14932 unsigned long func_type = arm_current_func_type ();
14933 unsigned long save_reg_mask = 0;
14934 unsigned int reg;
14936 if (IS_INTERRUPT (func_type))
14938 unsigned int max_reg;
14939 /* Interrupt functions must not corrupt any registers,
14940 even call clobbered ones. If this is a leaf function
14941 we can just examine the registers used by the RTL, but
14942 otherwise we have to assume that whatever function is
14943 called might clobber anything, and so we have to save
14944 all the call-clobbered registers as well. */
14945 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14946 /* FIQ handlers have registers r8 - r12 banked, so
14947 we only need to check r0 - r7, Normal ISRs only
14948 bank r14 and r15, so we must check up to r12.
14949 r13 is the stack pointer which is always preserved,
14950 so we do not need to consider it here. */
14951 max_reg = 7;
14952 else
14953 max_reg = 12;
14955 for (reg = 0; reg <= max_reg; reg++)
14956 if (df_regs_ever_live_p (reg)
14957 || (! crtl->is_leaf && call_used_regs[reg]))
14958 save_reg_mask |= (1 << reg);
14960 /* Also save the pic base register if necessary. */
14961 if (flag_pic
14962 && !TARGET_SINGLE_PIC_BASE
14963 && arm_pic_register != INVALID_REGNUM
14964 && crtl->uses_pic_offset_table)
14965 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14967 else if (IS_VOLATILE(func_type))
14969 /* For noreturn functions we historically omitted register saves
14970 altogether. However this really messes up debugging. As a
14971 compromise save just the frame pointers. Combined with the link
14972 register saved elsewhere this should be sufficient to get
14973 a backtrace. */
14974 if (frame_pointer_needed)
14975 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14976 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14977 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14978 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14979 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14981 else
14983 /* In the normal case we only need to save those registers
14984 which are call saved and which are used by this function. */
14985 for (reg = 0; reg <= 11; reg++)
14986 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14987 save_reg_mask |= (1 << reg);
14989 /* Handle the frame pointer as a special case. */
14990 if (frame_pointer_needed)
14991 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14993 /* If we aren't loading the PIC register,
14994 don't stack it even though it may be live. */
14995 if (flag_pic
14996 && !TARGET_SINGLE_PIC_BASE
14997 && arm_pic_register != INVALID_REGNUM
14998 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14999 || crtl->uses_pic_offset_table))
15000 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15002 /* The prologue will copy SP into R0, so save it. */
15003 if (IS_STACKALIGN (func_type))
15004 save_reg_mask |= 1;
15007 /* Save registers so the exception handler can modify them. */
15008 if (crtl->calls_eh_return)
15010 unsigned int i;
15012 for (i = 0; ; i++)
15014 reg = EH_RETURN_DATA_REGNO (i);
15015 if (reg == INVALID_REGNUM)
15016 break;
15017 save_reg_mask |= 1 << reg;
15021 return save_reg_mask;
15025 /* Compute the number of bytes used to store the static chain register on the
15026 stack, above the stack frame. We need to know this accurately to get the
15027 alignment of the rest of the stack frame correct. */
15029 static int arm_compute_static_chain_stack_bytes (void)
15031 unsigned long func_type = arm_current_func_type ();
15032 int static_chain_stack_bytes = 0;
15034 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15035 IS_NESTED (func_type) &&
15036 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15037 static_chain_stack_bytes = 4;
15039 return static_chain_stack_bytes;
15043 /* Compute a bit mask of which registers need to be
15044 saved on the stack for the current function.
15045 This is used by arm_get_frame_offsets, which may add extra registers. */
15047 static unsigned long
15048 arm_compute_save_reg_mask (void)
15050 unsigned int save_reg_mask = 0;
15051 unsigned long func_type = arm_current_func_type ();
15052 unsigned int reg;
15054 if (IS_NAKED (func_type))
15055 /* This should never really happen. */
15056 return 0;
15058 /* If we are creating a stack frame, then we must save the frame pointer,
15059 IP (which will hold the old stack pointer), LR and the PC. */
15060 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15061 save_reg_mask |=
15062 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15063 | (1 << IP_REGNUM)
15064 | (1 << LR_REGNUM)
15065 | (1 << PC_REGNUM);
15067 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15069 /* Decide if we need to save the link register.
15070 Interrupt routines have their own banked link register,
15071 so they never need to save it.
15072 Otherwise if we do not use the link register we do not need to save
15073 it. If we are pushing other registers onto the stack however, we
15074 can save an instruction in the epilogue by pushing the link register
15075 now and then popping it back into the PC. This incurs extra memory
15076 accesses though, so we only do it when optimizing for size, and only
15077 if we know that we will not need a fancy return sequence. */
15078 if (df_regs_ever_live_p (LR_REGNUM)
15079 || (save_reg_mask
15080 && optimize_size
15081 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15082 && !crtl->calls_eh_return))
15083 save_reg_mask |= 1 << LR_REGNUM;
15085 if (cfun->machine->lr_save_eliminated)
15086 save_reg_mask &= ~ (1 << LR_REGNUM);
15088 if (TARGET_REALLY_IWMMXT
15089 && ((bit_count (save_reg_mask)
15090 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15091 arm_compute_static_chain_stack_bytes())
15092 ) % 2) != 0)
15094 /* The total number of registers that are going to be pushed
15095 onto the stack is odd. We need to ensure that the stack
15096 is 64-bit aligned before we start to save iWMMXt registers,
15097 and also before we start to create locals. (A local variable
15098 might be a double or long long which we will load/store using
15099 an iWMMXt instruction). Therefore we need to push another
15100 ARM register, so that the stack will be 64-bit aligned. We
15101 try to avoid using the arg registers (r0 -r3) as they might be
15102 used to pass values in a tail call. */
15103 for (reg = 4; reg <= 12; reg++)
15104 if ((save_reg_mask & (1 << reg)) == 0)
15105 break;
15107 if (reg <= 12)
15108 save_reg_mask |= (1 << reg);
15109 else
15111 cfun->machine->sibcall_blocked = 1;
15112 save_reg_mask |= (1 << 3);
15116 /* We may need to push an additional register for use initializing the
15117 PIC base register. */
15118 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15119 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15121 reg = thumb_find_work_register (1 << 4);
15122 if (!call_used_regs[reg])
15123 save_reg_mask |= (1 << reg);
15126 return save_reg_mask;
15130 /* Compute a bit mask of which registers need to be
15131 saved on the stack for the current function. */
15132 static unsigned long
15133 thumb1_compute_save_reg_mask (void)
15135 unsigned long mask;
15136 unsigned reg;
15138 mask = 0;
15139 for (reg = 0; reg < 12; reg ++)
15140 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15141 mask |= 1 << reg;
15143 if (flag_pic
15144 && !TARGET_SINGLE_PIC_BASE
15145 && arm_pic_register != INVALID_REGNUM
15146 && crtl->uses_pic_offset_table)
15147 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15149 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15150 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15151 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15153 /* LR will also be pushed if any lo regs are pushed. */
15154 if (mask & 0xff || thumb_force_lr_save ())
15155 mask |= (1 << LR_REGNUM);
15157 /* Make sure we have a low work register if we need one.
15158 We will need one if we are going to push a high register,
15159 but we are not currently intending to push a low register. */
15160 if ((mask & 0xff) == 0
15161 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15163 /* Use thumb_find_work_register to choose which register
15164 we will use. If the register is live then we will
15165 have to push it. Use LAST_LO_REGNUM as our fallback
15166 choice for the register to select. */
15167 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15168 /* Make sure the register returned by thumb_find_work_register is
15169 not part of the return value. */
15170 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15171 reg = LAST_LO_REGNUM;
15173 if (! call_used_regs[reg])
15174 mask |= 1 << reg;
15177 /* The 504 below is 8 bytes less than 512 because there are two possible
15178 alignment words. We can't tell here if they will be present or not so we
15179 have to play it safe and assume that they are. */
15180 if ((CALLER_INTERWORKING_SLOT_SIZE +
15181 ROUND_UP_WORD (get_frame_size ()) +
15182 crtl->outgoing_args_size) >= 504)
15184 /* This is the same as the code in thumb1_expand_prologue() which
15185 determines which register to use for stack decrement. */
15186 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15187 if (mask & (1 << reg))
15188 break;
15190 if (reg > LAST_LO_REGNUM)
15192 /* Make sure we have a register available for stack decrement. */
15193 mask |= 1 << LAST_LO_REGNUM;
15197 return mask;
15201 /* Return the number of bytes required to save VFP registers. */
15202 static int
15203 arm_get_vfp_saved_size (void)
15205 unsigned int regno;
15206 int count;
15207 int saved;
15209 saved = 0;
15210 /* Space for saved VFP registers. */
15211 if (TARGET_HARD_FLOAT && TARGET_VFP)
15213 count = 0;
15214 for (regno = FIRST_VFP_REGNUM;
15215 regno < LAST_VFP_REGNUM;
15216 regno += 2)
15218 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15219 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15221 if (count > 0)
15223 /* Workaround ARM10 VFPr1 bug. */
15224 if (count == 2 && !arm_arch6)
15225 count++;
15226 saved += count * 8;
15228 count = 0;
15230 else
15231 count++;
15233 if (count > 0)
15235 if (count == 2 && !arm_arch6)
15236 count++;
15237 saved += count * 8;
15240 return saved;
15244 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15245 everything bar the final return instruction. If simple_return is true,
15246 then do not output epilogue, because it has already been emitted in RTL. */
15247 const char *
15248 output_return_instruction (rtx operand, bool really_return, bool reverse,
15249 bool simple_return)
15251 char conditional[10];
15252 char instr[100];
15253 unsigned reg;
15254 unsigned long live_regs_mask;
15255 unsigned long func_type;
15256 arm_stack_offsets *offsets;
15258 func_type = arm_current_func_type ();
15260 if (IS_NAKED (func_type))
15261 return "";
15263 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15265 /* If this function was declared non-returning, and we have
15266 found a tail call, then we have to trust that the called
15267 function won't return. */
15268 if (really_return)
15270 rtx ops[2];
15272 /* Otherwise, trap an attempted return by aborting. */
15273 ops[0] = operand;
15274 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15275 : "abort");
15276 assemble_external_libcall (ops[1]);
15277 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15280 return "";
15283 gcc_assert (!cfun->calls_alloca || really_return);
15285 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15287 cfun->machine->return_used_this_function = 1;
15289 offsets = arm_get_frame_offsets ();
15290 live_regs_mask = offsets->saved_regs_mask;
15292 if (!simple_return && live_regs_mask)
15294 const char * return_reg;
15296 /* If we do not have any special requirements for function exit
15297 (e.g. interworking) then we can load the return address
15298 directly into the PC. Otherwise we must load it into LR. */
15299 if (really_return
15300 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15301 return_reg = reg_names[PC_REGNUM];
15302 else
15303 return_reg = reg_names[LR_REGNUM];
15305 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15307 /* There are three possible reasons for the IP register
15308 being saved. 1) a stack frame was created, in which case
15309 IP contains the old stack pointer, or 2) an ISR routine
15310 corrupted it, or 3) it was saved to align the stack on
15311 iWMMXt. In case 1, restore IP into SP, otherwise just
15312 restore IP. */
15313 if (frame_pointer_needed)
15315 live_regs_mask &= ~ (1 << IP_REGNUM);
15316 live_regs_mask |= (1 << SP_REGNUM);
15318 else
15319 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15322 /* On some ARM architectures it is faster to use LDR rather than
15323 LDM to load a single register. On other architectures, the
15324 cost is the same. In 26 bit mode, or for exception handlers,
15325 we have to use LDM to load the PC so that the CPSR is also
15326 restored. */
15327 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15328 if (live_regs_mask == (1U << reg))
15329 break;
15331 if (reg <= LAST_ARM_REGNUM
15332 && (reg != LR_REGNUM
15333 || ! really_return
15334 || ! IS_INTERRUPT (func_type)))
15336 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15337 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15339 else
15341 char *p;
15342 int first = 1;
15344 /* Generate the load multiple instruction to restore the
15345 registers. Note we can get here, even if
15346 frame_pointer_needed is true, but only if sp already
15347 points to the base of the saved core registers. */
15348 if (live_regs_mask & (1 << SP_REGNUM))
15350 unsigned HOST_WIDE_INT stack_adjust;
15352 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15353 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15355 if (stack_adjust && arm_arch5 && TARGET_ARM)
15356 if (TARGET_UNIFIED_ASM)
15357 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15358 else
15359 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15360 else
15362 /* If we can't use ldmib (SA110 bug),
15363 then try to pop r3 instead. */
15364 if (stack_adjust)
15365 live_regs_mask |= 1 << 3;
15367 if (TARGET_UNIFIED_ASM)
15368 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15369 else
15370 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15373 else
15374 if (TARGET_UNIFIED_ASM)
15375 sprintf (instr, "pop%s\t{", conditional);
15376 else
15377 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15379 p = instr + strlen (instr);
15381 for (reg = 0; reg <= SP_REGNUM; reg++)
15382 if (live_regs_mask & (1 << reg))
15384 int l = strlen (reg_names[reg]);
15386 if (first)
15387 first = 0;
15388 else
15390 memcpy (p, ", ", 2);
15391 p += 2;
15394 memcpy (p, "%|", 2);
15395 memcpy (p + 2, reg_names[reg], l);
15396 p += l + 2;
15399 if (live_regs_mask & (1 << LR_REGNUM))
15401 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15402 /* If returning from an interrupt, restore the CPSR. */
15403 if (IS_INTERRUPT (func_type))
15404 strcat (p, "^");
15406 else
15407 strcpy (p, "}");
15410 output_asm_insn (instr, & operand);
15412 /* See if we need to generate an extra instruction to
15413 perform the actual function return. */
15414 if (really_return
15415 && func_type != ARM_FT_INTERWORKED
15416 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15418 /* The return has already been handled
15419 by loading the LR into the PC. */
15420 return "";
15424 if (really_return)
15426 switch ((int) ARM_FUNC_TYPE (func_type))
15428 case ARM_FT_ISR:
15429 case ARM_FT_FIQ:
15430 /* ??? This is wrong for unified assembly syntax. */
15431 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15432 break;
15434 case ARM_FT_INTERWORKED:
15435 sprintf (instr, "bx%s\t%%|lr", conditional);
15436 break;
15438 case ARM_FT_EXCEPTION:
15439 /* ??? This is wrong for unified assembly syntax. */
15440 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15441 break;
15443 default:
15444 /* Use bx if it's available. */
15445 if (arm_arch5 || arm_arch4t)
15446 sprintf (instr, "bx%s\t%%|lr", conditional);
15447 else
15448 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15449 break;
15452 output_asm_insn (instr, & operand);
15455 return "";
15458 /* Write the function name into the code section, directly preceding
15459 the function prologue.
15461 Code will be output similar to this:
15463 .ascii "arm_poke_function_name", 0
15464 .align
15466 .word 0xff000000 + (t1 - t0)
15467 arm_poke_function_name
15468 mov ip, sp
15469 stmfd sp!, {fp, ip, lr, pc}
15470 sub fp, ip, #4
15472 When performing a stack backtrace, code can inspect the value
15473 of 'pc' stored at 'fp' + 0. If the trace function then looks
15474 at location pc - 12 and the top 8 bits are set, then we know
15475 that there is a function name embedded immediately preceding this
15476 location and has length ((pc[-3]) & 0xff000000).
15478 We assume that pc is declared as a pointer to an unsigned long.
15480 It is of no benefit to output the function name if we are assembling
15481 a leaf function. These function types will not contain a stack
15482 backtrace structure, therefore it is not possible to determine the
15483 function name. */
15484 void
15485 arm_poke_function_name (FILE *stream, const char *name)
15487 unsigned long alignlength;
15488 unsigned long length;
15489 rtx x;
15491 length = strlen (name) + 1;
15492 alignlength = ROUND_UP_WORD (length);
15494 ASM_OUTPUT_ASCII (stream, name, length);
15495 ASM_OUTPUT_ALIGN (stream, 2);
15496 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15497 assemble_aligned_integer (UNITS_PER_WORD, x);
15500 /* Place some comments into the assembler stream
15501 describing the current function. */
15502 static void
15503 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15505 unsigned long func_type;
15507 /* ??? Do we want to print some of the below anyway? */
15508 if (TARGET_THUMB1)
15509 return;
15511 /* Sanity check. */
15512 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15514 func_type = arm_current_func_type ();
15516 switch ((int) ARM_FUNC_TYPE (func_type))
15518 default:
15519 case ARM_FT_NORMAL:
15520 break;
15521 case ARM_FT_INTERWORKED:
15522 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15523 break;
15524 case ARM_FT_ISR:
15525 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15526 break;
15527 case ARM_FT_FIQ:
15528 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15529 break;
15530 case ARM_FT_EXCEPTION:
15531 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15532 break;
15535 if (IS_NAKED (func_type))
15536 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15538 if (IS_VOLATILE (func_type))
15539 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15541 if (IS_NESTED (func_type))
15542 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15543 if (IS_STACKALIGN (func_type))
15544 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15546 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15547 crtl->args.size,
15548 crtl->args.pretend_args_size, frame_size);
15550 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15551 frame_pointer_needed,
15552 cfun->machine->uses_anonymous_args);
15554 if (cfun->machine->lr_save_eliminated)
15555 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15557 if (crtl->calls_eh_return)
15558 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15562 static void
15563 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15564 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15566 arm_stack_offsets *offsets;
15568 if (TARGET_THUMB1)
15570 int regno;
15572 /* Emit any call-via-reg trampolines that are needed for v4t support
15573 of call_reg and call_value_reg type insns. */
15574 for (regno = 0; regno < LR_REGNUM; regno++)
15576 rtx label = cfun->machine->call_via[regno];
15578 if (label != NULL)
15580 switch_to_section (function_section (current_function_decl));
15581 targetm.asm_out.internal_label (asm_out_file, "L",
15582 CODE_LABEL_NUMBER (label));
15583 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15587 /* ??? Probably not safe to set this here, since it assumes that a
15588 function will be emitted as assembly immediately after we generate
15589 RTL for it. This does not happen for inline functions. */
15590 cfun->machine->return_used_this_function = 0;
15592 else /* TARGET_32BIT */
15594 /* We need to take into account any stack-frame rounding. */
15595 offsets = arm_get_frame_offsets ();
15597 gcc_assert (!use_return_insn (FALSE, NULL)
15598 || (cfun->machine->return_used_this_function != 0)
15599 || offsets->saved_regs == offsets->outgoing_args
15600 || frame_pointer_needed);
15602 /* Reset the ARM-specific per-function variables. */
15603 after_arm_reorg = 0;
15607 /* Generate and emit an insn that we will recognize as a push_multi.
15608 Unfortunately, since this insn does not reflect very well the actual
15609 semantics of the operation, we need to annotate the insn for the benefit
15610 of DWARF2 frame unwind information. */
15611 static rtx
15612 emit_multi_reg_push (unsigned long mask)
15614 int num_regs = 0;
15615 int num_dwarf_regs;
15616 int i, j;
15617 rtx par;
15618 rtx dwarf;
15619 int dwarf_par_index;
15620 rtx tmp, reg;
15622 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15623 if (mask & (1 << i))
15624 num_regs++;
15626 gcc_assert (num_regs && num_regs <= 16);
15628 /* We don't record the PC in the dwarf frame information. */
15629 num_dwarf_regs = num_regs;
15630 if (mask & (1 << PC_REGNUM))
15631 num_dwarf_regs--;
15633 /* For the body of the insn we are going to generate an UNSPEC in
15634 parallel with several USEs. This allows the insn to be recognized
15635 by the push_multi pattern in the arm.md file.
15637 The body of the insn looks something like this:
15639 (parallel [
15640 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15641 (const_int:SI <num>)))
15642 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15643 (use (reg:SI XX))
15644 (use (reg:SI YY))
15648 For the frame note however, we try to be more explicit and actually
15649 show each register being stored into the stack frame, plus a (single)
15650 decrement of the stack pointer. We do it this way in order to be
15651 friendly to the stack unwinding code, which only wants to see a single
15652 stack decrement per instruction. The RTL we generate for the note looks
15653 something like this:
15655 (sequence [
15656 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15657 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15658 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15659 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15663 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15664 instead we'd have a parallel expression detailing all
15665 the stores to the various memory addresses so that debug
15666 information is more up-to-date. Remember however while writing
15667 this to take care of the constraints with the push instruction.
15669 Note also that this has to be taken care of for the VFP registers.
15671 For more see PR43399. */
15673 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15674 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15675 dwarf_par_index = 1;
15677 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15679 if (mask & (1 << i))
15681 reg = gen_rtx_REG (SImode, i);
15683 XVECEXP (par, 0, 0)
15684 = gen_rtx_SET (VOIDmode,
15685 gen_frame_mem
15686 (BLKmode,
15687 gen_rtx_PRE_MODIFY (Pmode,
15688 stack_pointer_rtx,
15689 plus_constant
15690 (Pmode, stack_pointer_rtx,
15691 -4 * num_regs))
15693 gen_rtx_UNSPEC (BLKmode,
15694 gen_rtvec (1, reg),
15695 UNSPEC_PUSH_MULT));
15697 if (i != PC_REGNUM)
15699 tmp = gen_rtx_SET (VOIDmode,
15700 gen_frame_mem (SImode, stack_pointer_rtx),
15701 reg);
15702 RTX_FRAME_RELATED_P (tmp) = 1;
15703 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15704 dwarf_par_index++;
15707 break;
15711 for (j = 1, i++; j < num_regs; i++)
15713 if (mask & (1 << i))
15715 reg = gen_rtx_REG (SImode, i);
15717 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15719 if (i != PC_REGNUM)
15722 = gen_rtx_SET (VOIDmode,
15723 gen_frame_mem
15724 (SImode,
15725 plus_constant (Pmode, stack_pointer_rtx,
15726 4 * j)),
15727 reg);
15728 RTX_FRAME_RELATED_P (tmp) = 1;
15729 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15732 j++;
15736 par = emit_insn (par);
15738 tmp = gen_rtx_SET (VOIDmode,
15739 stack_pointer_rtx,
15740 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15741 RTX_FRAME_RELATED_P (tmp) = 1;
15742 XVECEXP (dwarf, 0, 0) = tmp;
15744 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15746 return par;
15749 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15750 SAVED_REGS_MASK shows which registers need to be restored.
15752 Unfortunately, since this insn does not reflect very well the actual
15753 semantics of the operation, we need to annotate the insn for the benefit
15754 of DWARF2 frame unwind information. */
15755 static void
15756 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15758 int num_regs = 0;
15759 int i, j;
15760 rtx par;
15761 rtx dwarf = NULL_RTX;
15762 rtx tmp, reg;
15763 bool return_in_pc;
15764 int offset_adj;
15765 int emit_update;
15767 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15768 offset_adj = return_in_pc ? 1 : 0;
15769 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15770 if (saved_regs_mask & (1 << i))
15771 num_regs++;
15773 gcc_assert (num_regs && num_regs <= 16);
15775 /* If SP is in reglist, then we don't emit SP update insn. */
15776 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15778 /* The parallel needs to hold num_regs SETs
15779 and one SET for the stack update. */
15780 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15782 if (return_in_pc)
15784 tmp = ret_rtx;
15785 XVECEXP (par, 0, 0) = tmp;
15788 if (emit_update)
15790 /* Increment the stack pointer, based on there being
15791 num_regs 4-byte registers to restore. */
15792 tmp = gen_rtx_SET (VOIDmode,
15793 stack_pointer_rtx,
15794 plus_constant (Pmode,
15795 stack_pointer_rtx,
15796 4 * num_regs));
15797 RTX_FRAME_RELATED_P (tmp) = 1;
15798 XVECEXP (par, 0, offset_adj) = tmp;
15801 /* Now restore every reg, which may include PC. */
15802 for (j = 0, i = 0; j < num_regs; i++)
15803 if (saved_regs_mask & (1 << i))
15805 reg = gen_rtx_REG (SImode, i);
15806 tmp = gen_rtx_SET (VOIDmode,
15807 reg,
15808 gen_frame_mem
15809 (SImode,
15810 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15811 RTX_FRAME_RELATED_P (tmp) = 1;
15812 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15814 /* We need to maintain a sequence for DWARF info too. As dwarf info
15815 should not have PC, skip PC. */
15816 if (i != PC_REGNUM)
15817 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15819 j++;
15822 if (return_in_pc)
15823 par = emit_jump_insn (par);
15824 else
15825 par = emit_insn (par);
15827 REG_NOTES (par) = dwarf;
15830 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15831 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15833 Unfortunately, since this insn does not reflect very well the actual
15834 semantics of the operation, we need to annotate the insn for the benefit
15835 of DWARF2 frame unwind information. */
15836 static void
15837 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
15839 int i, j;
15840 rtx par;
15841 rtx dwarf = NULL_RTX;
15842 rtx tmp, reg;
15844 gcc_assert (num_regs && num_regs <= 32);
15846 /* Workaround ARM10 VFPr1 bug. */
15847 if (num_regs == 2 && !arm_arch6)
15849 if (first_reg == 15)
15850 first_reg--;
15852 num_regs++;
15855 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
15856 there could be up to 32 D-registers to restore.
15857 If there are more than 16 D-registers, make two recursive calls,
15858 each of which emits one pop_multi instruction. */
15859 if (num_regs > 16)
15861 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
15862 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
15863 return;
15866 /* The parallel needs to hold num_regs SETs
15867 and one SET for the stack update. */
15868 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
15870 /* Increment the stack pointer, based on there being
15871 num_regs 8-byte registers to restore. */
15872 tmp = gen_rtx_SET (VOIDmode,
15873 base_reg,
15874 plus_constant (Pmode, base_reg, 8 * num_regs));
15875 RTX_FRAME_RELATED_P (tmp) = 1;
15876 XVECEXP (par, 0, 0) = tmp;
15878 /* Now show every reg that will be restored, using a SET for each. */
15879 for (j = 0, i=first_reg; j < num_regs; i += 2)
15881 reg = gen_rtx_REG (DFmode, i);
15883 tmp = gen_rtx_SET (VOIDmode,
15884 reg,
15885 gen_frame_mem
15886 (DFmode,
15887 plus_constant (Pmode, base_reg, 8 * j)));
15888 RTX_FRAME_RELATED_P (tmp) = 1;
15889 XVECEXP (par, 0, j + 1) = tmp;
15891 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15893 j++;
15896 par = emit_insn (par);
15897 REG_NOTES (par) = dwarf;
15900 /* Calculate the size of the return value that is passed in registers. */
15901 static unsigned
15902 arm_size_return_regs (void)
15904 enum machine_mode mode;
15906 if (crtl->return_rtx != 0)
15907 mode = GET_MODE (crtl->return_rtx);
15908 else
15909 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15911 return GET_MODE_SIZE (mode);
15914 /* Return true if the current function needs to save/restore LR. */
15915 static bool
15916 thumb_force_lr_save (void)
15918 return !cfun->machine->lr_save_eliminated
15919 && (!leaf_function_p ()
15920 || thumb_far_jump_used_p ()
15921 || df_regs_ever_live_p (LR_REGNUM));
15925 /* Return true if r3 is used by any of the tail call insns in the
15926 current function. */
15927 static bool
15928 any_sibcall_uses_r3 (void)
15930 edge_iterator ei;
15931 edge e;
15933 if (!crtl->tail_call_emit)
15934 return false;
15935 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15936 if (e->flags & EDGE_SIBCALL)
15938 rtx call = BB_END (e->src);
15939 if (!CALL_P (call))
15940 call = prev_nonnote_nondebug_insn (call);
15941 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15942 if (find_regno_fusage (call, USE, 3))
15943 return true;
15945 return false;
15949 /* Compute the distance from register FROM to register TO.
15950 These can be the arg pointer (26), the soft frame pointer (25),
15951 the stack pointer (13) or the hard frame pointer (11).
15952 In thumb mode r7 is used as the soft frame pointer, if needed.
15953 Typical stack layout looks like this:
15955 old stack pointer -> | |
15956 ----
15957 | | \
15958 | | saved arguments for
15959 | | vararg functions
15960 | | /
15962 hard FP & arg pointer -> | | \
15963 | | stack
15964 | | frame
15965 | | /
15967 | | \
15968 | | call saved
15969 | | registers
15970 soft frame pointer -> | | /
15972 | | \
15973 | | local
15974 | | variables
15975 locals base pointer -> | | /
15977 | | \
15978 | | outgoing
15979 | | arguments
15980 current stack pointer -> | | /
15983 For a given function some or all of these stack components
15984 may not be needed, giving rise to the possibility of
15985 eliminating some of the registers.
15987 The values returned by this function must reflect the behavior
15988 of arm_expand_prologue() and arm_compute_save_reg_mask().
15990 The sign of the number returned reflects the direction of stack
15991 growth, so the values are positive for all eliminations except
15992 from the soft frame pointer to the hard frame pointer.
15994 SFP may point just inside the local variables block to ensure correct
15995 alignment. */
15998 /* Calculate stack offsets. These are used to calculate register elimination
15999 offsets and in prologue/epilogue code. Also calculates which registers
16000 should be saved. */
16002 static arm_stack_offsets *
16003 arm_get_frame_offsets (void)
16005 struct arm_stack_offsets *offsets;
16006 unsigned long func_type;
16007 int leaf;
16008 int saved;
16009 int core_saved;
16010 HOST_WIDE_INT frame_size;
16011 int i;
16013 offsets = &cfun->machine->stack_offsets;
16015 /* We need to know if we are a leaf function. Unfortunately, it
16016 is possible to be called after start_sequence has been called,
16017 which causes get_insns to return the insns for the sequence,
16018 not the function, which will cause leaf_function_p to return
16019 the incorrect result.
16021 to know about leaf functions once reload has completed, and the
16022 frame size cannot be changed after that time, so we can safely
16023 use the cached value. */
16025 if (reload_completed)
16026 return offsets;
16028 /* Initially this is the size of the local variables. It will translated
16029 into an offset once we have determined the size of preceding data. */
16030 frame_size = ROUND_UP_WORD (get_frame_size ());
16032 leaf = leaf_function_p ();
16034 /* Space for variadic functions. */
16035 offsets->saved_args = crtl->args.pretend_args_size;
16037 /* In Thumb mode this is incorrect, but never used. */
16038 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16039 arm_compute_static_chain_stack_bytes();
16041 if (TARGET_32BIT)
16043 unsigned int regno;
16045 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16046 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16047 saved = core_saved;
16049 /* We know that SP will be doubleword aligned on entry, and we must
16050 preserve that condition at any subroutine call. We also require the
16051 soft frame pointer to be doubleword aligned. */
16053 if (TARGET_REALLY_IWMMXT)
16055 /* Check for the call-saved iWMMXt registers. */
16056 for (regno = FIRST_IWMMXT_REGNUM;
16057 regno <= LAST_IWMMXT_REGNUM;
16058 regno++)
16059 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16060 saved += 8;
16063 func_type = arm_current_func_type ();
16064 /* Space for saved VFP registers. */
16065 if (! IS_VOLATILE (func_type)
16066 && TARGET_HARD_FLOAT && TARGET_VFP)
16067 saved += arm_get_vfp_saved_size ();
16069 else /* TARGET_THUMB1 */
16071 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16072 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16073 saved = core_saved;
16074 if (TARGET_BACKTRACE)
16075 saved += 16;
16078 /* Saved registers include the stack frame. */
16079 offsets->saved_regs = offsets->saved_args + saved +
16080 arm_compute_static_chain_stack_bytes();
16081 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16082 /* A leaf function does not need any stack alignment if it has nothing
16083 on the stack. */
16084 if (leaf && frame_size == 0
16085 /* However if it calls alloca(), we have a dynamically allocated
16086 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16087 && ! cfun->calls_alloca)
16089 offsets->outgoing_args = offsets->soft_frame;
16090 offsets->locals_base = offsets->soft_frame;
16091 return offsets;
16094 /* Ensure SFP has the correct alignment. */
16095 if (ARM_DOUBLEWORD_ALIGN
16096 && (offsets->soft_frame & 7))
16098 offsets->soft_frame += 4;
16099 /* Try to align stack by pushing an extra reg. Don't bother doing this
16100 when there is a stack frame as the alignment will be rolled into
16101 the normal stack adjustment. */
16102 if (frame_size + crtl->outgoing_args_size == 0)
16104 int reg = -1;
16106 /* If it is safe to use r3, then do so. This sometimes
16107 generates better code on Thumb-2 by avoiding the need to
16108 use 32-bit push/pop instructions. */
16109 if (! any_sibcall_uses_r3 ()
16110 && arm_size_return_regs () <= 12
16111 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16113 reg = 3;
16115 else
16116 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16118 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16120 reg = i;
16121 break;
16125 if (reg != -1)
16127 offsets->saved_regs += 4;
16128 offsets->saved_regs_mask |= (1 << reg);
16133 offsets->locals_base = offsets->soft_frame + frame_size;
16134 offsets->outgoing_args = (offsets->locals_base
16135 + crtl->outgoing_args_size);
16137 if (ARM_DOUBLEWORD_ALIGN)
16139 /* Ensure SP remains doubleword aligned. */
16140 if (offsets->outgoing_args & 7)
16141 offsets->outgoing_args += 4;
16142 gcc_assert (!(offsets->outgoing_args & 7));
16145 return offsets;
16149 /* Calculate the relative offsets for the different stack pointers. Positive
16150 offsets are in the direction of stack growth. */
16152 HOST_WIDE_INT
16153 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16155 arm_stack_offsets *offsets;
16157 offsets = arm_get_frame_offsets ();
16159 /* OK, now we have enough information to compute the distances.
16160 There must be an entry in these switch tables for each pair
16161 of registers in ELIMINABLE_REGS, even if some of the entries
16162 seem to be redundant or useless. */
16163 switch (from)
16165 case ARG_POINTER_REGNUM:
16166 switch (to)
16168 case THUMB_HARD_FRAME_POINTER_REGNUM:
16169 return 0;
16171 case FRAME_POINTER_REGNUM:
16172 /* This is the reverse of the soft frame pointer
16173 to hard frame pointer elimination below. */
16174 return offsets->soft_frame - offsets->saved_args;
16176 case ARM_HARD_FRAME_POINTER_REGNUM:
16177 /* This is only non-zero in the case where the static chain register
16178 is stored above the frame. */
16179 return offsets->frame - offsets->saved_args - 4;
16181 case STACK_POINTER_REGNUM:
16182 /* If nothing has been pushed on the stack at all
16183 then this will return -4. This *is* correct! */
16184 return offsets->outgoing_args - (offsets->saved_args + 4);
16186 default:
16187 gcc_unreachable ();
16189 gcc_unreachable ();
16191 case FRAME_POINTER_REGNUM:
16192 switch (to)
16194 case THUMB_HARD_FRAME_POINTER_REGNUM:
16195 return 0;
16197 case ARM_HARD_FRAME_POINTER_REGNUM:
16198 /* The hard frame pointer points to the top entry in the
16199 stack frame. The soft frame pointer to the bottom entry
16200 in the stack frame. If there is no stack frame at all,
16201 then they are identical. */
16203 return offsets->frame - offsets->soft_frame;
16205 case STACK_POINTER_REGNUM:
16206 return offsets->outgoing_args - offsets->soft_frame;
16208 default:
16209 gcc_unreachable ();
16211 gcc_unreachable ();
16213 default:
16214 /* You cannot eliminate from the stack pointer.
16215 In theory you could eliminate from the hard frame
16216 pointer to the stack pointer, but this will never
16217 happen, since if a stack frame is not needed the
16218 hard frame pointer will never be used. */
16219 gcc_unreachable ();
16223 /* Given FROM and TO register numbers, say whether this elimination is
16224 allowed. Frame pointer elimination is automatically handled.
16226 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16227 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16228 pointer, we must eliminate FRAME_POINTER_REGNUM into
16229 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16230 ARG_POINTER_REGNUM. */
16232 bool
16233 arm_can_eliminate (const int from, const int to)
16235 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16236 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16237 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16238 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16239 true);
16242 /* Emit RTL to save coprocessor registers on function entry. Returns the
16243 number of bytes pushed. */
16245 static int
16246 arm_save_coproc_regs(void)
16248 int saved_size = 0;
16249 unsigned reg;
16250 unsigned start_reg;
16251 rtx insn;
16253 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16254 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16256 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16257 insn = gen_rtx_MEM (V2SImode, insn);
16258 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16259 RTX_FRAME_RELATED_P (insn) = 1;
16260 saved_size += 8;
16263 if (TARGET_HARD_FLOAT && TARGET_VFP)
16265 start_reg = FIRST_VFP_REGNUM;
16267 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16269 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16270 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16272 if (start_reg != reg)
16273 saved_size += vfp_emit_fstmd (start_reg,
16274 (reg - start_reg) / 2);
16275 start_reg = reg + 2;
16278 if (start_reg != reg)
16279 saved_size += vfp_emit_fstmd (start_reg,
16280 (reg - start_reg) / 2);
16282 return saved_size;
16286 /* Set the Thumb frame pointer from the stack pointer. */
16288 static void
16289 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16291 HOST_WIDE_INT amount;
16292 rtx insn, dwarf;
16294 amount = offsets->outgoing_args - offsets->locals_base;
16295 if (amount < 1024)
16296 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16297 stack_pointer_rtx, GEN_INT (amount)));
16298 else
16300 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16301 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16302 expects the first two operands to be the same. */
16303 if (TARGET_THUMB2)
16305 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16306 stack_pointer_rtx,
16307 hard_frame_pointer_rtx));
16309 else
16311 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16312 hard_frame_pointer_rtx,
16313 stack_pointer_rtx));
16315 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16316 plus_constant (Pmode, stack_pointer_rtx, amount));
16317 RTX_FRAME_RELATED_P (dwarf) = 1;
16318 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16321 RTX_FRAME_RELATED_P (insn) = 1;
16324 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16325 function. */
16326 void
16327 arm_expand_prologue (void)
16329 rtx amount;
16330 rtx insn;
16331 rtx ip_rtx;
16332 unsigned long live_regs_mask;
16333 unsigned long func_type;
16334 int fp_offset = 0;
16335 int saved_pretend_args = 0;
16336 int saved_regs = 0;
16337 unsigned HOST_WIDE_INT args_to_push;
16338 arm_stack_offsets *offsets;
16340 func_type = arm_current_func_type ();
16342 /* Naked functions don't have prologues. */
16343 if (IS_NAKED (func_type))
16344 return;
16346 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16347 args_to_push = crtl->args.pretend_args_size;
16349 /* Compute which register we will have to save onto the stack. */
16350 offsets = arm_get_frame_offsets ();
16351 live_regs_mask = offsets->saved_regs_mask;
16353 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16355 if (IS_STACKALIGN (func_type))
16357 rtx r0, r1;
16359 /* Handle a word-aligned stack pointer. We generate the following:
16361 mov r0, sp
16362 bic r1, r0, #7
16363 mov sp, r1
16364 <save and restore r0 in normal prologue/epilogue>
16365 mov sp, r0
16366 bx lr
16368 The unwinder doesn't need to know about the stack realignment.
16369 Just tell it we saved SP in r0. */
16370 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16372 r0 = gen_rtx_REG (SImode, 0);
16373 r1 = gen_rtx_REG (SImode, 1);
16375 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16376 RTX_FRAME_RELATED_P (insn) = 1;
16377 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16379 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16381 /* ??? The CFA changes here, which may cause GDB to conclude that it
16382 has entered a different function. That said, the unwind info is
16383 correct, individually, before and after this instruction because
16384 we've described the save of SP, which will override the default
16385 handling of SP as restoring from the CFA. */
16386 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16389 /* For APCS frames, if IP register is clobbered
16390 when creating frame, save that register in a special
16391 way. */
16392 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16394 if (IS_INTERRUPT (func_type))
16396 /* Interrupt functions must not corrupt any registers.
16397 Creating a frame pointer however, corrupts the IP
16398 register, so we must push it first. */
16399 emit_multi_reg_push (1 << IP_REGNUM);
16401 /* Do not set RTX_FRAME_RELATED_P on this insn.
16402 The dwarf stack unwinding code only wants to see one
16403 stack decrement per function, and this is not it. If
16404 this instruction is labeled as being part of the frame
16405 creation sequence then dwarf2out_frame_debug_expr will
16406 die when it encounters the assignment of IP to FP
16407 later on, since the use of SP here establishes SP as
16408 the CFA register and not IP.
16410 Anyway this instruction is not really part of the stack
16411 frame creation although it is part of the prologue. */
16413 else if (IS_NESTED (func_type))
16415 /* The Static chain register is the same as the IP register
16416 used as a scratch register during stack frame creation.
16417 To get around this need to find somewhere to store IP
16418 whilst the frame is being created. We try the following
16419 places in order:
16421 1. The last argument register.
16422 2. A slot on the stack above the frame. (This only
16423 works if the function is not a varargs function).
16424 3. Register r3, after pushing the argument registers
16425 onto the stack.
16427 Note - we only need to tell the dwarf2 backend about the SP
16428 adjustment in the second variant; the static chain register
16429 doesn't need to be unwound, as it doesn't contain a value
16430 inherited from the caller. */
16432 if (df_regs_ever_live_p (3) == false)
16433 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16434 else if (args_to_push == 0)
16436 rtx dwarf;
16438 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16439 saved_regs += 4;
16441 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16442 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16443 fp_offset = 4;
16445 /* Just tell the dwarf backend that we adjusted SP. */
16446 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16447 plus_constant (Pmode, stack_pointer_rtx,
16448 -fp_offset));
16449 RTX_FRAME_RELATED_P (insn) = 1;
16450 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16452 else
16454 /* Store the args on the stack. */
16455 if (cfun->machine->uses_anonymous_args)
16456 insn = emit_multi_reg_push
16457 ((0xf0 >> (args_to_push / 4)) & 0xf);
16458 else
16459 insn = emit_insn
16460 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16461 GEN_INT (- args_to_push)));
16463 RTX_FRAME_RELATED_P (insn) = 1;
16465 saved_pretend_args = 1;
16466 fp_offset = args_to_push;
16467 args_to_push = 0;
16469 /* Now reuse r3 to preserve IP. */
16470 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16474 insn = emit_set_insn (ip_rtx,
16475 plus_constant (Pmode, stack_pointer_rtx,
16476 fp_offset));
16477 RTX_FRAME_RELATED_P (insn) = 1;
16480 if (args_to_push)
16482 /* Push the argument registers, or reserve space for them. */
16483 if (cfun->machine->uses_anonymous_args)
16484 insn = emit_multi_reg_push
16485 ((0xf0 >> (args_to_push / 4)) & 0xf);
16486 else
16487 insn = emit_insn
16488 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16489 GEN_INT (- args_to_push)));
16490 RTX_FRAME_RELATED_P (insn) = 1;
16493 /* If this is an interrupt service routine, and the link register
16494 is going to be pushed, and we're not generating extra
16495 push of IP (needed when frame is needed and frame layout if apcs),
16496 subtracting four from LR now will mean that the function return
16497 can be done with a single instruction. */
16498 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16499 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16500 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16501 && TARGET_ARM)
16503 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16505 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16508 if (live_regs_mask)
16510 saved_regs += bit_count (live_regs_mask) * 4;
16511 if (optimize_size && !frame_pointer_needed
16512 && saved_regs == offsets->saved_regs - offsets->saved_args)
16514 /* If no coprocessor registers are being pushed and we don't have
16515 to worry about a frame pointer then push extra registers to
16516 create the stack frame. This is done is a way that does not
16517 alter the frame layout, so is independent of the epilogue. */
16518 int n;
16519 int frame;
16520 n = 0;
16521 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16522 n++;
16523 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16524 if (frame && n * 4 >= frame)
16526 n = frame / 4;
16527 live_regs_mask |= (1 << n) - 1;
16528 saved_regs += frame;
16531 insn = emit_multi_reg_push (live_regs_mask);
16532 RTX_FRAME_RELATED_P (insn) = 1;
16535 if (! IS_VOLATILE (func_type))
16536 saved_regs += arm_save_coproc_regs ();
16538 if (frame_pointer_needed && TARGET_ARM)
16540 /* Create the new frame pointer. */
16541 if (TARGET_APCS_FRAME)
16543 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16544 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16545 RTX_FRAME_RELATED_P (insn) = 1;
16547 if (IS_NESTED (func_type))
16549 /* Recover the static chain register. */
16550 if (!df_regs_ever_live_p (3)
16551 || saved_pretend_args)
16552 insn = gen_rtx_REG (SImode, 3);
16553 else /* if (crtl->args.pretend_args_size == 0) */
16555 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16556 insn = gen_frame_mem (SImode, insn);
16558 emit_set_insn (ip_rtx, insn);
16559 /* Add a USE to stop propagate_one_insn() from barfing. */
16560 emit_insn (gen_prologue_use (ip_rtx));
16563 else
16565 insn = GEN_INT (saved_regs - 4);
16566 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16567 stack_pointer_rtx, insn));
16568 RTX_FRAME_RELATED_P (insn) = 1;
16572 if (flag_stack_usage_info)
16573 current_function_static_stack_size
16574 = offsets->outgoing_args - offsets->saved_args;
16576 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16578 /* This add can produce multiple insns for a large constant, so we
16579 need to get tricky. */
16580 rtx last = get_last_insn ();
16582 amount = GEN_INT (offsets->saved_args + saved_regs
16583 - offsets->outgoing_args);
16585 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16586 amount));
16589 last = last ? NEXT_INSN (last) : get_insns ();
16590 RTX_FRAME_RELATED_P (last) = 1;
16592 while (last != insn);
16594 /* If the frame pointer is needed, emit a special barrier that
16595 will prevent the scheduler from moving stores to the frame
16596 before the stack adjustment. */
16597 if (frame_pointer_needed)
16598 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16599 hard_frame_pointer_rtx));
16603 if (frame_pointer_needed && TARGET_THUMB2)
16604 thumb_set_frame_pointer (offsets);
16606 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16608 unsigned long mask;
16610 mask = live_regs_mask;
16611 mask &= THUMB2_WORK_REGS;
16612 if (!IS_NESTED (func_type))
16613 mask |= (1 << IP_REGNUM);
16614 arm_load_pic_register (mask);
16617 /* If we are profiling, make sure no instructions are scheduled before
16618 the call to mcount. Similarly if the user has requested no
16619 scheduling in the prolog. Similarly if we want non-call exceptions
16620 using the EABI unwinder, to prevent faulting instructions from being
16621 swapped with a stack adjustment. */
16622 if (crtl->profile || !TARGET_SCHED_PROLOG
16623 || (arm_except_unwind_info (&global_options) == UI_TARGET
16624 && cfun->can_throw_non_call_exceptions))
16625 emit_insn (gen_blockage ());
16627 /* If the link register is being kept alive, with the return address in it,
16628 then make sure that it does not get reused by the ce2 pass. */
16629 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16630 cfun->machine->lr_save_eliminated = 1;
16633 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16634 static void
16635 arm_print_condition (FILE *stream)
16637 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16639 /* Branch conversion is not implemented for Thumb-2. */
16640 if (TARGET_THUMB)
16642 output_operand_lossage ("predicated Thumb instruction");
16643 return;
16645 if (current_insn_predicate != NULL)
16647 output_operand_lossage
16648 ("predicated instruction in conditional sequence");
16649 return;
16652 fputs (arm_condition_codes[arm_current_cc], stream);
16654 else if (current_insn_predicate)
16656 enum arm_cond_code code;
16658 if (TARGET_THUMB1)
16660 output_operand_lossage ("predicated Thumb instruction");
16661 return;
16664 code = get_arm_condition_code (current_insn_predicate);
16665 fputs (arm_condition_codes[code], stream);
16670 /* If CODE is 'd', then the X is a condition operand and the instruction
16671 should only be executed if the condition is true.
16672 if CODE is 'D', then the X is a condition operand and the instruction
16673 should only be executed if the condition is false: however, if the mode
16674 of the comparison is CCFPEmode, then always execute the instruction -- we
16675 do this because in these circumstances !GE does not necessarily imply LT;
16676 in these cases the instruction pattern will take care to make sure that
16677 an instruction containing %d will follow, thereby undoing the effects of
16678 doing this instruction unconditionally.
16679 If CODE is 'N' then X is a floating point operand that must be negated
16680 before output.
16681 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16682 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16683 static void
16684 arm_print_operand (FILE *stream, rtx x, int code)
16686 switch (code)
16688 case '@':
16689 fputs (ASM_COMMENT_START, stream);
16690 return;
16692 case '_':
16693 fputs (user_label_prefix, stream);
16694 return;
16696 case '|':
16697 fputs (REGISTER_PREFIX, stream);
16698 return;
16700 case '?':
16701 arm_print_condition (stream);
16702 return;
16704 case '(':
16705 /* Nothing in unified syntax, otherwise the current condition code. */
16706 if (!TARGET_UNIFIED_ASM)
16707 arm_print_condition (stream);
16708 break;
16710 case ')':
16711 /* The current condition code in unified syntax, otherwise nothing. */
16712 if (TARGET_UNIFIED_ASM)
16713 arm_print_condition (stream);
16714 break;
16716 case '.':
16717 /* The current condition code for a condition code setting instruction.
16718 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16719 if (TARGET_UNIFIED_ASM)
16721 fputc('s', stream);
16722 arm_print_condition (stream);
16724 else
16726 arm_print_condition (stream);
16727 fputc('s', stream);
16729 return;
16731 case '!':
16732 /* If the instruction is conditionally executed then print
16733 the current condition code, otherwise print 's'. */
16734 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16735 if (current_insn_predicate)
16736 arm_print_condition (stream);
16737 else
16738 fputc('s', stream);
16739 break;
16741 /* %# is a "break" sequence. It doesn't output anything, but is used to
16742 separate e.g. operand numbers from following text, if that text consists
16743 of further digits which we don't want to be part of the operand
16744 number. */
16745 case '#':
16746 return;
16748 case 'N':
16750 REAL_VALUE_TYPE r;
16751 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16752 r = real_value_negate (&r);
16753 fprintf (stream, "%s", fp_const_from_val (&r));
16755 return;
16757 /* An integer or symbol address without a preceding # sign. */
16758 case 'c':
16759 switch (GET_CODE (x))
16761 case CONST_INT:
16762 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16763 break;
16765 case SYMBOL_REF:
16766 output_addr_const (stream, x);
16767 break;
16769 case CONST:
16770 if (GET_CODE (XEXP (x, 0)) == PLUS
16771 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16773 output_addr_const (stream, x);
16774 break;
16776 /* Fall through. */
16778 default:
16779 output_operand_lossage ("Unsupported operand for code '%c'", code);
16781 return;
16783 /* An integer that we want to print in HEX. */
16784 case 'x':
16785 switch (GET_CODE (x))
16787 case CONST_INT:
16788 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16789 break;
16791 default:
16792 output_operand_lossage ("Unsupported operand for code '%c'", code);
16794 return;
16796 case 'B':
16797 if (GET_CODE (x) == CONST_INT)
16799 HOST_WIDE_INT val;
16800 val = ARM_SIGN_EXTEND (~INTVAL (x));
16801 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16803 else
16805 putc ('~', stream);
16806 output_addr_const (stream, x);
16808 return;
16810 case 'L':
16811 /* The low 16 bits of an immediate constant. */
16812 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16813 return;
16815 case 'i':
16816 fprintf (stream, "%s", arithmetic_instr (x, 1));
16817 return;
16819 case 'I':
16820 fprintf (stream, "%s", arithmetic_instr (x, 0));
16821 return;
16823 case 'S':
16825 HOST_WIDE_INT val;
16826 const char *shift;
16828 if (!shift_operator (x, SImode))
16830 output_operand_lossage ("invalid shift operand");
16831 break;
16834 shift = shift_op (x, &val);
16836 if (shift)
16838 fprintf (stream, ", %s ", shift);
16839 if (val == -1)
16840 arm_print_operand (stream, XEXP (x, 1), 0);
16841 else
16842 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16845 return;
16847 /* An explanation of the 'Q', 'R' and 'H' register operands:
16849 In a pair of registers containing a DI or DF value the 'Q'
16850 operand returns the register number of the register containing
16851 the least significant part of the value. The 'R' operand returns
16852 the register number of the register containing the most
16853 significant part of the value.
16855 The 'H' operand returns the higher of the two register numbers.
16856 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16857 same as the 'Q' operand, since the most significant part of the
16858 value is held in the lower number register. The reverse is true
16859 on systems where WORDS_BIG_ENDIAN is false.
16861 The purpose of these operands is to distinguish between cases
16862 where the endian-ness of the values is important (for example
16863 when they are added together), and cases where the endian-ness
16864 is irrelevant, but the order of register operations is important.
16865 For example when loading a value from memory into a register
16866 pair, the endian-ness does not matter. Provided that the value
16867 from the lower memory address is put into the lower numbered
16868 register, and the value from the higher address is put into the
16869 higher numbered register, the load will work regardless of whether
16870 the value being loaded is big-wordian or little-wordian. The
16871 order of the two register loads can matter however, if the address
16872 of the memory location is actually held in one of the registers
16873 being overwritten by the load.
16875 The 'Q' and 'R' constraints are also available for 64-bit
16876 constants. */
16877 case 'Q':
16878 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16880 rtx part = gen_lowpart (SImode, x);
16881 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16882 return;
16885 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16887 output_operand_lossage ("invalid operand for code '%c'", code);
16888 return;
16891 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16892 return;
16894 case 'R':
16895 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16897 enum machine_mode mode = GET_MODE (x);
16898 rtx part;
16900 if (mode == VOIDmode)
16901 mode = DImode;
16902 part = gen_highpart_mode (SImode, mode, x);
16903 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16904 return;
16907 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16909 output_operand_lossage ("invalid operand for code '%c'", code);
16910 return;
16913 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16914 return;
16916 case 'H':
16917 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16919 output_operand_lossage ("invalid operand for code '%c'", code);
16920 return;
16923 asm_fprintf (stream, "%r", REGNO (x) + 1);
16924 return;
16926 case 'J':
16927 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16929 output_operand_lossage ("invalid operand for code '%c'", code);
16930 return;
16933 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16934 return;
16936 case 'K':
16937 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16939 output_operand_lossage ("invalid operand for code '%c'", code);
16940 return;
16943 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16944 return;
16946 case 'm':
16947 asm_fprintf (stream, "%r",
16948 GET_CODE (XEXP (x, 0)) == REG
16949 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16950 return;
16952 case 'M':
16953 asm_fprintf (stream, "{%r-%r}",
16954 REGNO (x),
16955 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16956 return;
16958 /* Like 'M', but writing doubleword vector registers, for use by Neon
16959 insns. */
16960 case 'h':
16962 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16963 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16964 if (numregs == 1)
16965 asm_fprintf (stream, "{d%d}", regno);
16966 else
16967 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16969 return;
16971 case 'd':
16972 /* CONST_TRUE_RTX means always -- that's the default. */
16973 if (x == const_true_rtx)
16974 return;
16976 if (!COMPARISON_P (x))
16978 output_operand_lossage ("invalid operand for code '%c'", code);
16979 return;
16982 fputs (arm_condition_codes[get_arm_condition_code (x)],
16983 stream);
16984 return;
16986 case 'D':
16987 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16988 want to do that. */
16989 if (x == const_true_rtx)
16991 output_operand_lossage ("instruction never executed");
16992 return;
16994 if (!COMPARISON_P (x))
16996 output_operand_lossage ("invalid operand for code '%c'", code);
16997 return;
17000 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17001 (get_arm_condition_code (x))],
17002 stream);
17003 return;
17005 case 's':
17006 case 'V':
17007 case 'W':
17008 case 'X':
17009 case 'Y':
17010 case 'Z':
17011 /* Former Maverick support, removed after GCC-4.7. */
17012 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17013 return;
17015 case 'U':
17016 if (GET_CODE (x) != REG
17017 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17018 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17019 /* Bad value for wCG register number. */
17021 output_operand_lossage ("invalid operand for code '%c'", code);
17022 return;
17025 else
17026 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17027 return;
17029 /* Print an iWMMXt control register name. */
17030 case 'w':
17031 if (GET_CODE (x) != CONST_INT
17032 || INTVAL (x) < 0
17033 || INTVAL (x) >= 16)
17034 /* Bad value for wC register number. */
17036 output_operand_lossage ("invalid operand for code '%c'", code);
17037 return;
17040 else
17042 static const char * wc_reg_names [16] =
17044 "wCID", "wCon", "wCSSF", "wCASF",
17045 "wC4", "wC5", "wC6", "wC7",
17046 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17047 "wC12", "wC13", "wC14", "wC15"
17050 fprintf (stream, wc_reg_names [INTVAL (x)]);
17052 return;
17054 /* Print the high single-precision register of a VFP double-precision
17055 register. */
17056 case 'p':
17058 int mode = GET_MODE (x);
17059 int regno;
17061 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17063 output_operand_lossage ("invalid operand for code '%c'", code);
17064 return;
17067 regno = REGNO (x);
17068 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17070 output_operand_lossage ("invalid operand for code '%c'", code);
17071 return;
17074 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17076 return;
17078 /* Print a VFP/Neon double precision or quad precision register name. */
17079 case 'P':
17080 case 'q':
17082 int mode = GET_MODE (x);
17083 int is_quad = (code == 'q');
17084 int regno;
17086 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17088 output_operand_lossage ("invalid operand for code '%c'", code);
17089 return;
17092 if (GET_CODE (x) != REG
17093 || !IS_VFP_REGNUM (REGNO (x)))
17095 output_operand_lossage ("invalid operand for code '%c'", code);
17096 return;
17099 regno = REGNO (x);
17100 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17101 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17103 output_operand_lossage ("invalid operand for code '%c'", code);
17104 return;
17107 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17108 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17110 return;
17112 /* These two codes print the low/high doubleword register of a Neon quad
17113 register, respectively. For pair-structure types, can also print
17114 low/high quadword registers. */
17115 case 'e':
17116 case 'f':
17118 int mode = GET_MODE (x);
17119 int regno;
17121 if ((GET_MODE_SIZE (mode) != 16
17122 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17124 output_operand_lossage ("invalid operand for code '%c'", code);
17125 return;
17128 regno = REGNO (x);
17129 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17131 output_operand_lossage ("invalid operand for code '%c'", code);
17132 return;
17135 if (GET_MODE_SIZE (mode) == 16)
17136 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17137 + (code == 'f' ? 1 : 0));
17138 else
17139 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17140 + (code == 'f' ? 1 : 0));
17142 return;
17144 /* Print a VFPv3 floating-point constant, represented as an integer
17145 index. */
17146 case 'G':
17148 int index = vfp3_const_double_index (x);
17149 gcc_assert (index != -1);
17150 fprintf (stream, "%d", index);
17152 return;
17154 /* Print bits representing opcode features for Neon.
17156 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17157 and polynomials as unsigned.
17159 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17161 Bit 2 is 1 for rounding functions, 0 otherwise. */
17163 /* Identify the type as 's', 'u', 'p' or 'f'. */
17164 case 'T':
17166 HOST_WIDE_INT bits = INTVAL (x);
17167 fputc ("uspf"[bits & 3], stream);
17169 return;
17171 /* Likewise, but signed and unsigned integers are both 'i'. */
17172 case 'F':
17174 HOST_WIDE_INT bits = INTVAL (x);
17175 fputc ("iipf"[bits & 3], stream);
17177 return;
17179 /* As for 'T', but emit 'u' instead of 'p'. */
17180 case 't':
17182 HOST_WIDE_INT bits = INTVAL (x);
17183 fputc ("usuf"[bits & 3], stream);
17185 return;
17187 /* Bit 2: rounding (vs none). */
17188 case 'O':
17190 HOST_WIDE_INT bits = INTVAL (x);
17191 fputs ((bits & 4) != 0 ? "r" : "", stream);
17193 return;
17195 /* Memory operand for vld1/vst1 instruction. */
17196 case 'A':
17198 rtx addr;
17199 bool postinc = FALSE;
17200 unsigned align, memsize, align_bits;
17202 gcc_assert (GET_CODE (x) == MEM);
17203 addr = XEXP (x, 0);
17204 if (GET_CODE (addr) == POST_INC)
17206 postinc = 1;
17207 addr = XEXP (addr, 0);
17209 asm_fprintf (stream, "[%r", REGNO (addr));
17211 /* We know the alignment of this access, so we can emit a hint in the
17212 instruction (for some alignments) as an aid to the memory subsystem
17213 of the target. */
17214 align = MEM_ALIGN (x) >> 3;
17215 memsize = MEM_SIZE (x);
17217 /* Only certain alignment specifiers are supported by the hardware. */
17218 if (memsize == 32 && (align % 32) == 0)
17219 align_bits = 256;
17220 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17221 align_bits = 128;
17222 else if (memsize >= 8 && (align % 8) == 0)
17223 align_bits = 64;
17224 else
17225 align_bits = 0;
17227 if (align_bits != 0)
17228 asm_fprintf (stream, ":%d", align_bits);
17230 asm_fprintf (stream, "]");
17232 if (postinc)
17233 fputs("!", stream);
17235 return;
17237 case 'C':
17239 rtx addr;
17241 gcc_assert (GET_CODE (x) == MEM);
17242 addr = XEXP (x, 0);
17243 gcc_assert (GET_CODE (addr) == REG);
17244 asm_fprintf (stream, "[%r]", REGNO (addr));
17246 return;
17248 /* Translate an S register number into a D register number and element index. */
17249 case 'y':
17251 int mode = GET_MODE (x);
17252 int regno;
17254 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17256 output_operand_lossage ("invalid operand for code '%c'", code);
17257 return;
17260 regno = REGNO (x);
17261 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17263 output_operand_lossage ("invalid operand for code '%c'", code);
17264 return;
17267 regno = regno - FIRST_VFP_REGNUM;
17268 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17270 return;
17272 case 'v':
17273 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17274 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17275 return;
17277 /* Register specifier for vld1.16/vst1.16. Translate the S register
17278 number into a D register number and element index. */
17279 case 'z':
17281 int mode = GET_MODE (x);
17282 int regno;
17284 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17286 output_operand_lossage ("invalid operand for code '%c'", code);
17287 return;
17290 regno = REGNO (x);
17291 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17293 output_operand_lossage ("invalid operand for code '%c'", code);
17294 return;
17297 regno = regno - FIRST_VFP_REGNUM;
17298 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17300 return;
17302 default:
17303 if (x == 0)
17305 output_operand_lossage ("missing operand");
17306 return;
17309 switch (GET_CODE (x))
17311 case REG:
17312 asm_fprintf (stream, "%r", REGNO (x));
17313 break;
17315 case MEM:
17316 output_memory_reference_mode = GET_MODE (x);
17317 output_address (XEXP (x, 0));
17318 break;
17320 case CONST_DOUBLE:
17321 if (TARGET_NEON)
17323 char fpstr[20];
17324 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17325 sizeof (fpstr), 0, 1);
17326 fprintf (stream, "#%s", fpstr);
17328 else
17329 fprintf (stream, "#%s", fp_immediate_constant (x));
17330 break;
17332 default:
17333 gcc_assert (GET_CODE (x) != NEG);
17334 fputc ('#', stream);
17335 if (GET_CODE (x) == HIGH)
17337 fputs (":lower16:", stream);
17338 x = XEXP (x, 0);
17341 output_addr_const (stream, x);
17342 break;
17347 /* Target hook for printing a memory address. */
17348 static void
17349 arm_print_operand_address (FILE *stream, rtx x)
17351 if (TARGET_32BIT)
17353 int is_minus = GET_CODE (x) == MINUS;
17355 if (GET_CODE (x) == REG)
17356 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17357 else if (GET_CODE (x) == PLUS || is_minus)
17359 rtx base = XEXP (x, 0);
17360 rtx index = XEXP (x, 1);
17361 HOST_WIDE_INT offset = 0;
17362 if (GET_CODE (base) != REG
17363 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17365 /* Ensure that BASE is a register. */
17366 /* (one of them must be). */
17367 /* Also ensure the SP is not used as in index register. */
17368 rtx temp = base;
17369 base = index;
17370 index = temp;
17372 switch (GET_CODE (index))
17374 case CONST_INT:
17375 offset = INTVAL (index);
17376 if (is_minus)
17377 offset = -offset;
17378 asm_fprintf (stream, "[%r, #%wd]",
17379 REGNO (base), offset);
17380 break;
17382 case REG:
17383 asm_fprintf (stream, "[%r, %s%r]",
17384 REGNO (base), is_minus ? "-" : "",
17385 REGNO (index));
17386 break;
17388 case MULT:
17389 case ASHIFTRT:
17390 case LSHIFTRT:
17391 case ASHIFT:
17392 case ROTATERT:
17394 asm_fprintf (stream, "[%r, %s%r",
17395 REGNO (base), is_minus ? "-" : "",
17396 REGNO (XEXP (index, 0)));
17397 arm_print_operand (stream, index, 'S');
17398 fputs ("]", stream);
17399 break;
17402 default:
17403 gcc_unreachable ();
17406 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17407 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17409 extern enum machine_mode output_memory_reference_mode;
17411 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17413 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17414 asm_fprintf (stream, "[%r, #%s%d]!",
17415 REGNO (XEXP (x, 0)),
17416 GET_CODE (x) == PRE_DEC ? "-" : "",
17417 GET_MODE_SIZE (output_memory_reference_mode));
17418 else
17419 asm_fprintf (stream, "[%r], #%s%d",
17420 REGNO (XEXP (x, 0)),
17421 GET_CODE (x) == POST_DEC ? "-" : "",
17422 GET_MODE_SIZE (output_memory_reference_mode));
17424 else if (GET_CODE (x) == PRE_MODIFY)
17426 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17427 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17428 asm_fprintf (stream, "#%wd]!",
17429 INTVAL (XEXP (XEXP (x, 1), 1)));
17430 else
17431 asm_fprintf (stream, "%r]!",
17432 REGNO (XEXP (XEXP (x, 1), 1)));
17434 else if (GET_CODE (x) == POST_MODIFY)
17436 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17437 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17438 asm_fprintf (stream, "#%wd",
17439 INTVAL (XEXP (XEXP (x, 1), 1)));
17440 else
17441 asm_fprintf (stream, "%r",
17442 REGNO (XEXP (XEXP (x, 1), 1)));
17444 else output_addr_const (stream, x);
17446 else
17448 if (GET_CODE (x) == REG)
17449 asm_fprintf (stream, "[%r]", REGNO (x));
17450 else if (GET_CODE (x) == POST_INC)
17451 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17452 else if (GET_CODE (x) == PLUS)
17454 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17455 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17456 asm_fprintf (stream, "[%r, #%wd]",
17457 REGNO (XEXP (x, 0)),
17458 INTVAL (XEXP (x, 1)));
17459 else
17460 asm_fprintf (stream, "[%r, %r]",
17461 REGNO (XEXP (x, 0)),
17462 REGNO (XEXP (x, 1)));
17464 else
17465 output_addr_const (stream, x);
17469 /* Target hook for indicating whether a punctuation character for
17470 TARGET_PRINT_OPERAND is valid. */
17471 static bool
17472 arm_print_operand_punct_valid_p (unsigned char code)
17474 return (code == '@' || code == '|' || code == '.'
17475 || code == '(' || code == ')' || code == '#'
17476 || (TARGET_32BIT && (code == '?'))
17477 || (TARGET_THUMB2 && (code == '!'))
17478 || (TARGET_THUMB && (code == '_')));
17481 /* Target hook for assembling integer objects. The ARM version needs to
17482 handle word-sized values specially. */
17483 static bool
17484 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17486 enum machine_mode mode;
17488 if (size == UNITS_PER_WORD && aligned_p)
17490 fputs ("\t.word\t", asm_out_file);
17491 output_addr_const (asm_out_file, x);
17493 /* Mark symbols as position independent. We only do this in the
17494 .text segment, not in the .data segment. */
17495 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17496 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17498 /* See legitimize_pic_address for an explanation of the
17499 TARGET_VXWORKS_RTP check. */
17500 if (TARGET_VXWORKS_RTP
17501 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17502 fputs ("(GOT)", asm_out_file);
17503 else
17504 fputs ("(GOTOFF)", asm_out_file);
17506 fputc ('\n', asm_out_file);
17507 return true;
17510 mode = GET_MODE (x);
17512 if (arm_vector_mode_supported_p (mode))
17514 int i, units;
17516 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17518 units = CONST_VECTOR_NUNITS (x);
17519 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17521 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17522 for (i = 0; i < units; i++)
17524 rtx elt = CONST_VECTOR_ELT (x, i);
17525 assemble_integer
17526 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17528 else
17529 for (i = 0; i < units; i++)
17531 rtx elt = CONST_VECTOR_ELT (x, i);
17532 REAL_VALUE_TYPE rval;
17534 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17536 assemble_real
17537 (rval, GET_MODE_INNER (mode),
17538 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17541 return true;
17544 return default_assemble_integer (x, size, aligned_p);
17547 static void
17548 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17550 section *s;
17552 if (!TARGET_AAPCS_BASED)
17554 (is_ctor ?
17555 default_named_section_asm_out_constructor
17556 : default_named_section_asm_out_destructor) (symbol, priority);
17557 return;
17560 /* Put these in the .init_array section, using a special relocation. */
17561 if (priority != DEFAULT_INIT_PRIORITY)
17563 char buf[18];
17564 sprintf (buf, "%s.%.5u",
17565 is_ctor ? ".init_array" : ".fini_array",
17566 priority);
17567 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17569 else if (is_ctor)
17570 s = ctors_section;
17571 else
17572 s = dtors_section;
17574 switch_to_section (s);
17575 assemble_align (POINTER_SIZE);
17576 fputs ("\t.word\t", asm_out_file);
17577 output_addr_const (asm_out_file, symbol);
17578 fputs ("(target1)\n", asm_out_file);
17581 /* Add a function to the list of static constructors. */
17583 static void
17584 arm_elf_asm_constructor (rtx symbol, int priority)
17586 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17589 /* Add a function to the list of static destructors. */
17591 static void
17592 arm_elf_asm_destructor (rtx symbol, int priority)
17594 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17597 /* A finite state machine takes care of noticing whether or not instructions
17598 can be conditionally executed, and thus decrease execution time and code
17599 size by deleting branch instructions. The fsm is controlled by
17600 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17602 /* The state of the fsm controlling condition codes are:
17603 0: normal, do nothing special
17604 1: make ASM_OUTPUT_OPCODE not output this instruction
17605 2: make ASM_OUTPUT_OPCODE not output this instruction
17606 3: make instructions conditional
17607 4: make instructions conditional
17609 State transitions (state->state by whom under condition):
17610 0 -> 1 final_prescan_insn if the `target' is a label
17611 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17612 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17613 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17614 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17615 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17616 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17617 (the target insn is arm_target_insn).
17619 If the jump clobbers the conditions then we use states 2 and 4.
17621 A similar thing can be done with conditional return insns.
17623 XXX In case the `target' is an unconditional branch, this conditionalising
17624 of the instructions always reduces code size, but not always execution
17625 time. But then, I want to reduce the code size to somewhere near what
17626 /bin/cc produces. */
17628 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17629 instructions. When a COND_EXEC instruction is seen the subsequent
17630 instructions are scanned so that multiple conditional instructions can be
17631 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17632 specify the length and true/false mask for the IT block. These will be
17633 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17635 /* Returns the index of the ARM condition code string in
17636 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17637 COMPARISON should be an rtx like `(eq (...) (...))'. */
17639 enum arm_cond_code
17640 maybe_get_arm_condition_code (rtx comparison)
17642 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17643 enum arm_cond_code code;
17644 enum rtx_code comp_code = GET_CODE (comparison);
17646 if (GET_MODE_CLASS (mode) != MODE_CC)
17647 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17648 XEXP (comparison, 1));
17650 switch (mode)
17652 case CC_DNEmode: code = ARM_NE; goto dominance;
17653 case CC_DEQmode: code = ARM_EQ; goto dominance;
17654 case CC_DGEmode: code = ARM_GE; goto dominance;
17655 case CC_DGTmode: code = ARM_GT; goto dominance;
17656 case CC_DLEmode: code = ARM_LE; goto dominance;
17657 case CC_DLTmode: code = ARM_LT; goto dominance;
17658 case CC_DGEUmode: code = ARM_CS; goto dominance;
17659 case CC_DGTUmode: code = ARM_HI; goto dominance;
17660 case CC_DLEUmode: code = ARM_LS; goto dominance;
17661 case CC_DLTUmode: code = ARM_CC;
17663 dominance:
17664 if (comp_code == EQ)
17665 return ARM_INVERSE_CONDITION_CODE (code);
17666 if (comp_code == NE)
17667 return code;
17668 return ARM_NV;
17670 case CC_NOOVmode:
17671 switch (comp_code)
17673 case NE: return ARM_NE;
17674 case EQ: return ARM_EQ;
17675 case GE: return ARM_PL;
17676 case LT: return ARM_MI;
17677 default: return ARM_NV;
17680 case CC_Zmode:
17681 switch (comp_code)
17683 case NE: return ARM_NE;
17684 case EQ: return ARM_EQ;
17685 default: return ARM_NV;
17688 case CC_Nmode:
17689 switch (comp_code)
17691 case NE: return ARM_MI;
17692 case EQ: return ARM_PL;
17693 default: return ARM_NV;
17696 case CCFPEmode:
17697 case CCFPmode:
17698 /* We can handle all cases except UNEQ and LTGT. */
17699 switch (comp_code)
17701 case GE: return ARM_GE;
17702 case GT: return ARM_GT;
17703 case LE: return ARM_LS;
17704 case LT: return ARM_MI;
17705 case NE: return ARM_NE;
17706 case EQ: return ARM_EQ;
17707 case ORDERED: return ARM_VC;
17708 case UNORDERED: return ARM_VS;
17709 case UNLT: return ARM_LT;
17710 case UNLE: return ARM_LE;
17711 case UNGT: return ARM_HI;
17712 case UNGE: return ARM_PL;
17713 /* UNEQ and LTGT do not have a representation. */
17714 case UNEQ: /* Fall through. */
17715 case LTGT: /* Fall through. */
17716 default: return ARM_NV;
17719 case CC_SWPmode:
17720 switch (comp_code)
17722 case NE: return ARM_NE;
17723 case EQ: return ARM_EQ;
17724 case GE: return ARM_LE;
17725 case GT: return ARM_LT;
17726 case LE: return ARM_GE;
17727 case LT: return ARM_GT;
17728 case GEU: return ARM_LS;
17729 case GTU: return ARM_CC;
17730 case LEU: return ARM_CS;
17731 case LTU: return ARM_HI;
17732 default: return ARM_NV;
17735 case CC_Cmode:
17736 switch (comp_code)
17738 case LTU: return ARM_CS;
17739 case GEU: return ARM_CC;
17740 default: return ARM_NV;
17743 case CC_CZmode:
17744 switch (comp_code)
17746 case NE: return ARM_NE;
17747 case EQ: return ARM_EQ;
17748 case GEU: return ARM_CS;
17749 case GTU: return ARM_HI;
17750 case LEU: return ARM_LS;
17751 case LTU: return ARM_CC;
17752 default: return ARM_NV;
17755 case CC_NCVmode:
17756 switch (comp_code)
17758 case GE: return ARM_GE;
17759 case LT: return ARM_LT;
17760 case GEU: return ARM_CS;
17761 case LTU: return ARM_CC;
17762 default: return ARM_NV;
17765 case CCmode:
17766 switch (comp_code)
17768 case NE: return ARM_NE;
17769 case EQ: return ARM_EQ;
17770 case GE: return ARM_GE;
17771 case GT: return ARM_GT;
17772 case LE: return ARM_LE;
17773 case LT: return ARM_LT;
17774 case GEU: return ARM_CS;
17775 case GTU: return ARM_HI;
17776 case LEU: return ARM_LS;
17777 case LTU: return ARM_CC;
17778 default: return ARM_NV;
17781 default: gcc_unreachable ();
17785 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17786 static enum arm_cond_code
17787 get_arm_condition_code (rtx comparison)
17789 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17790 gcc_assert (code != ARM_NV);
17791 return code;
17794 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17795 instructions. */
17796 void
17797 thumb2_final_prescan_insn (rtx insn)
17799 rtx first_insn = insn;
17800 rtx body = PATTERN (insn);
17801 rtx predicate;
17802 enum arm_cond_code code;
17803 int n;
17804 int mask;
17806 /* Remove the previous insn from the count of insns to be output. */
17807 if (arm_condexec_count)
17808 arm_condexec_count--;
17810 /* Nothing to do if we are already inside a conditional block. */
17811 if (arm_condexec_count)
17812 return;
17814 if (GET_CODE (body) != COND_EXEC)
17815 return;
17817 /* Conditional jumps are implemented directly. */
17818 if (GET_CODE (insn) == JUMP_INSN)
17819 return;
17821 predicate = COND_EXEC_TEST (body);
17822 arm_current_cc = get_arm_condition_code (predicate);
17824 n = get_attr_ce_count (insn);
17825 arm_condexec_count = 1;
17826 arm_condexec_mask = (1 << n) - 1;
17827 arm_condexec_masklen = n;
17828 /* See if subsequent instructions can be combined into the same block. */
17829 for (;;)
17831 insn = next_nonnote_insn (insn);
17833 /* Jumping into the middle of an IT block is illegal, so a label or
17834 barrier terminates the block. */
17835 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17836 break;
17838 body = PATTERN (insn);
17839 /* USE and CLOBBER aren't really insns, so just skip them. */
17840 if (GET_CODE (body) == USE
17841 || GET_CODE (body) == CLOBBER)
17842 continue;
17844 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17845 if (GET_CODE (body) != COND_EXEC)
17846 break;
17847 /* Allow up to 4 conditionally executed instructions in a block. */
17848 n = get_attr_ce_count (insn);
17849 if (arm_condexec_masklen + n > 4)
17850 break;
17852 predicate = COND_EXEC_TEST (body);
17853 code = get_arm_condition_code (predicate);
17854 mask = (1 << n) - 1;
17855 if (arm_current_cc == code)
17856 arm_condexec_mask |= (mask << arm_condexec_masklen);
17857 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17858 break;
17860 arm_condexec_count++;
17861 arm_condexec_masklen += n;
17863 /* A jump must be the last instruction in a conditional block. */
17864 if (GET_CODE(insn) == JUMP_INSN)
17865 break;
17867 /* Restore recog_data (getting the attributes of other insns can
17868 destroy this array, but final.c assumes that it remains intact
17869 across this call). */
17870 extract_constrain_insn_cached (first_insn);
17873 void
17874 arm_final_prescan_insn (rtx insn)
17876 /* BODY will hold the body of INSN. */
17877 rtx body = PATTERN (insn);
17879 /* This will be 1 if trying to repeat the trick, and things need to be
17880 reversed if it appears to fail. */
17881 int reverse = 0;
17883 /* If we start with a return insn, we only succeed if we find another one. */
17884 int seeking_return = 0;
17885 enum rtx_code return_code = UNKNOWN;
17887 /* START_INSN will hold the insn from where we start looking. This is the
17888 first insn after the following code_label if REVERSE is true. */
17889 rtx start_insn = insn;
17891 /* If in state 4, check if the target branch is reached, in order to
17892 change back to state 0. */
17893 if (arm_ccfsm_state == 4)
17895 if (insn == arm_target_insn)
17897 arm_target_insn = NULL;
17898 arm_ccfsm_state = 0;
17900 return;
17903 /* If in state 3, it is possible to repeat the trick, if this insn is an
17904 unconditional branch to a label, and immediately following this branch
17905 is the previous target label which is only used once, and the label this
17906 branch jumps to is not too far off. */
17907 if (arm_ccfsm_state == 3)
17909 if (simplejump_p (insn))
17911 start_insn = next_nonnote_insn (start_insn);
17912 if (GET_CODE (start_insn) == BARRIER)
17914 /* XXX Isn't this always a barrier? */
17915 start_insn = next_nonnote_insn (start_insn);
17917 if (GET_CODE (start_insn) == CODE_LABEL
17918 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17919 && LABEL_NUSES (start_insn) == 1)
17920 reverse = TRUE;
17921 else
17922 return;
17924 else if (ANY_RETURN_P (body))
17926 start_insn = next_nonnote_insn (start_insn);
17927 if (GET_CODE (start_insn) == BARRIER)
17928 start_insn = next_nonnote_insn (start_insn);
17929 if (GET_CODE (start_insn) == CODE_LABEL
17930 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17931 && LABEL_NUSES (start_insn) == 1)
17933 reverse = TRUE;
17934 seeking_return = 1;
17935 return_code = GET_CODE (body);
17937 else
17938 return;
17940 else
17941 return;
17944 gcc_assert (!arm_ccfsm_state || reverse);
17945 if (GET_CODE (insn) != JUMP_INSN)
17946 return;
17948 /* This jump might be paralleled with a clobber of the condition codes
17949 the jump should always come first */
17950 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17951 body = XVECEXP (body, 0, 0);
17953 if (reverse
17954 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17955 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17957 int insns_skipped;
17958 int fail = FALSE, succeed = FALSE;
17959 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17960 int then_not_else = TRUE;
17961 rtx this_insn = start_insn, label = 0;
17963 /* Register the insn jumped to. */
17964 if (reverse)
17966 if (!seeking_return)
17967 label = XEXP (SET_SRC (body), 0);
17969 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17970 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17971 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17973 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17974 then_not_else = FALSE;
17976 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
17978 seeking_return = 1;
17979 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
17981 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
17983 seeking_return = 1;
17984 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
17985 then_not_else = FALSE;
17987 else
17988 gcc_unreachable ();
17990 /* See how many insns this branch skips, and what kind of insns. If all
17991 insns are okay, and the label or unconditional branch to the same
17992 label is not too far away, succeed. */
17993 for (insns_skipped = 0;
17994 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17996 rtx scanbody;
17998 this_insn = next_nonnote_insn (this_insn);
17999 if (!this_insn)
18000 break;
18002 switch (GET_CODE (this_insn))
18004 case CODE_LABEL:
18005 /* Succeed if it is the target label, otherwise fail since
18006 control falls in from somewhere else. */
18007 if (this_insn == label)
18009 arm_ccfsm_state = 1;
18010 succeed = TRUE;
18012 else
18013 fail = TRUE;
18014 break;
18016 case BARRIER:
18017 /* Succeed if the following insn is the target label.
18018 Otherwise fail.
18019 If return insns are used then the last insn in a function
18020 will be a barrier. */
18021 this_insn = next_nonnote_insn (this_insn);
18022 if (this_insn && this_insn == label)
18024 arm_ccfsm_state = 1;
18025 succeed = TRUE;
18027 else
18028 fail = TRUE;
18029 break;
18031 case CALL_INSN:
18032 /* The AAPCS says that conditional calls should not be
18033 used since they make interworking inefficient (the
18034 linker can't transform BL<cond> into BLX). That's
18035 only a problem if the machine has BLX. */
18036 if (arm_arch5)
18038 fail = TRUE;
18039 break;
18042 /* Succeed if the following insn is the target label, or
18043 if the following two insns are a barrier and the
18044 target label. */
18045 this_insn = next_nonnote_insn (this_insn);
18046 if (this_insn && GET_CODE (this_insn) == BARRIER)
18047 this_insn = next_nonnote_insn (this_insn);
18049 if (this_insn && this_insn == label
18050 && insns_skipped < max_insns_skipped)
18052 arm_ccfsm_state = 1;
18053 succeed = TRUE;
18055 else
18056 fail = TRUE;
18057 break;
18059 case JUMP_INSN:
18060 /* If this is an unconditional branch to the same label, succeed.
18061 If it is to another label, do nothing. If it is conditional,
18062 fail. */
18063 /* XXX Probably, the tests for SET and the PC are
18064 unnecessary. */
18066 scanbody = PATTERN (this_insn);
18067 if (GET_CODE (scanbody) == SET
18068 && GET_CODE (SET_DEST (scanbody)) == PC)
18070 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18071 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18073 arm_ccfsm_state = 2;
18074 succeed = TRUE;
18076 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18077 fail = TRUE;
18079 /* Fail if a conditional return is undesirable (e.g. on a
18080 StrongARM), but still allow this if optimizing for size. */
18081 else if (GET_CODE (scanbody) == return_code
18082 && !use_return_insn (TRUE, NULL)
18083 && !optimize_size)
18084 fail = TRUE;
18085 else if (GET_CODE (scanbody) == return_code)
18087 arm_ccfsm_state = 2;
18088 succeed = TRUE;
18090 else if (GET_CODE (scanbody) == PARALLEL)
18092 switch (get_attr_conds (this_insn))
18094 case CONDS_NOCOND:
18095 break;
18096 default:
18097 fail = TRUE;
18098 break;
18101 else
18102 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18104 break;
18106 case INSN:
18107 /* Instructions using or affecting the condition codes make it
18108 fail. */
18109 scanbody = PATTERN (this_insn);
18110 if (!(GET_CODE (scanbody) == SET
18111 || GET_CODE (scanbody) == PARALLEL)
18112 || get_attr_conds (this_insn) != CONDS_NOCOND)
18113 fail = TRUE;
18114 break;
18116 default:
18117 break;
18120 if (succeed)
18122 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18123 arm_target_label = CODE_LABEL_NUMBER (label);
18124 else
18126 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18128 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18130 this_insn = next_nonnote_insn (this_insn);
18131 gcc_assert (!this_insn
18132 || (GET_CODE (this_insn) != BARRIER
18133 && GET_CODE (this_insn) != CODE_LABEL));
18135 if (!this_insn)
18137 /* Oh, dear! we ran off the end.. give up. */
18138 extract_constrain_insn_cached (insn);
18139 arm_ccfsm_state = 0;
18140 arm_target_insn = NULL;
18141 return;
18143 arm_target_insn = this_insn;
18146 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18147 what it was. */
18148 if (!reverse)
18149 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18151 if (reverse || then_not_else)
18152 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18155 /* Restore recog_data (getting the attributes of other insns can
18156 destroy this array, but final.c assumes that it remains intact
18157 across this call. */
18158 extract_constrain_insn_cached (insn);
18162 /* Output IT instructions. */
18163 void
18164 thumb2_asm_output_opcode (FILE * stream)
18166 char buff[5];
18167 int n;
18169 if (arm_condexec_mask)
18171 for (n = 0; n < arm_condexec_masklen; n++)
18172 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18173 buff[n] = 0;
18174 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18175 arm_condition_codes[arm_current_cc]);
18176 arm_condexec_mask = 0;
18180 /* Returns true if REGNO is a valid register
18181 for holding a quantity of type MODE. */
18183 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18185 if (GET_MODE_CLASS (mode) == MODE_CC)
18186 return (regno == CC_REGNUM
18187 || (TARGET_HARD_FLOAT && TARGET_VFP
18188 && regno == VFPCC_REGNUM));
18190 if (TARGET_THUMB1)
18191 /* For the Thumb we only allow values bigger than SImode in
18192 registers 0 - 6, so that there is always a second low
18193 register available to hold the upper part of the value.
18194 We probably we ought to ensure that the register is the
18195 start of an even numbered register pair. */
18196 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18198 if (TARGET_HARD_FLOAT && TARGET_VFP
18199 && IS_VFP_REGNUM (regno))
18201 if (mode == SFmode || mode == SImode)
18202 return VFP_REGNO_OK_FOR_SINGLE (regno);
18204 if (mode == DFmode)
18205 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18207 /* VFP registers can hold HFmode values, but there is no point in
18208 putting them there unless we have hardware conversion insns. */
18209 if (mode == HFmode)
18210 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18212 if (TARGET_NEON)
18213 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18214 || (VALID_NEON_QREG_MODE (mode)
18215 && NEON_REGNO_OK_FOR_QUAD (regno))
18216 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18217 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18218 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18219 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18220 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18222 return FALSE;
18225 if (TARGET_REALLY_IWMMXT)
18227 if (IS_IWMMXT_GR_REGNUM (regno))
18228 return mode == SImode;
18230 if (IS_IWMMXT_REGNUM (regno))
18231 return VALID_IWMMXT_REG_MODE (mode);
18234 /* We allow almost any value to be stored in the general registers.
18235 Restrict doubleword quantities to even register pairs so that we can
18236 use ldrd. Do not allow very large Neon structure opaque modes in
18237 general registers; they would use too many. */
18238 if (regno <= LAST_ARM_REGNUM)
18239 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18240 && ARM_NUM_REGS (mode) <= 4;
18242 if (regno == FRAME_POINTER_REGNUM
18243 || regno == ARG_POINTER_REGNUM)
18244 /* We only allow integers in the fake hard registers. */
18245 return GET_MODE_CLASS (mode) == MODE_INT;
18247 return FALSE;
18250 /* Implement MODES_TIEABLE_P. */
18252 bool
18253 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18255 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18256 return true;
18258 /* We specifically want to allow elements of "structure" modes to
18259 be tieable to the structure. This more general condition allows
18260 other rarer situations too. */
18261 if (TARGET_NEON
18262 && (VALID_NEON_DREG_MODE (mode1)
18263 || VALID_NEON_QREG_MODE (mode1)
18264 || VALID_NEON_STRUCT_MODE (mode1))
18265 && (VALID_NEON_DREG_MODE (mode2)
18266 || VALID_NEON_QREG_MODE (mode2)
18267 || VALID_NEON_STRUCT_MODE (mode2)))
18268 return true;
18270 return false;
18273 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18274 not used in arm mode. */
18276 enum reg_class
18277 arm_regno_class (int regno)
18279 if (TARGET_THUMB1)
18281 if (regno == STACK_POINTER_REGNUM)
18282 return STACK_REG;
18283 if (regno == CC_REGNUM)
18284 return CC_REG;
18285 if (regno < 8)
18286 return LO_REGS;
18287 return HI_REGS;
18290 if (TARGET_THUMB2 && regno < 8)
18291 return LO_REGS;
18293 if ( regno <= LAST_ARM_REGNUM
18294 || regno == FRAME_POINTER_REGNUM
18295 || regno == ARG_POINTER_REGNUM)
18296 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18298 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18299 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18301 if (IS_VFP_REGNUM (regno))
18303 if (regno <= D7_VFP_REGNUM)
18304 return VFP_D0_D7_REGS;
18305 else if (regno <= LAST_LO_VFP_REGNUM)
18306 return VFP_LO_REGS;
18307 else
18308 return VFP_HI_REGS;
18311 if (IS_IWMMXT_REGNUM (regno))
18312 return IWMMXT_REGS;
18314 if (IS_IWMMXT_GR_REGNUM (regno))
18315 return IWMMXT_GR_REGS;
18317 return NO_REGS;
18320 /* Handle a special case when computing the offset
18321 of an argument from the frame pointer. */
18323 arm_debugger_arg_offset (int value, rtx addr)
18325 rtx insn;
18327 /* We are only interested if dbxout_parms() failed to compute the offset. */
18328 if (value != 0)
18329 return 0;
18331 /* We can only cope with the case where the address is held in a register. */
18332 if (GET_CODE (addr) != REG)
18333 return 0;
18335 /* If we are using the frame pointer to point at the argument, then
18336 an offset of 0 is correct. */
18337 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18338 return 0;
18340 /* If we are using the stack pointer to point at the
18341 argument, then an offset of 0 is correct. */
18342 /* ??? Check this is consistent with thumb2 frame layout. */
18343 if ((TARGET_THUMB || !frame_pointer_needed)
18344 && REGNO (addr) == SP_REGNUM)
18345 return 0;
18347 /* Oh dear. The argument is pointed to by a register rather
18348 than being held in a register, or being stored at a known
18349 offset from the frame pointer. Since GDB only understands
18350 those two kinds of argument we must translate the address
18351 held in the register into an offset from the frame pointer.
18352 We do this by searching through the insns for the function
18353 looking to see where this register gets its value. If the
18354 register is initialized from the frame pointer plus an offset
18355 then we are in luck and we can continue, otherwise we give up.
18357 This code is exercised by producing debugging information
18358 for a function with arguments like this:
18360 double func (double a, double b, int c, double d) {return d;}
18362 Without this code the stab for parameter 'd' will be set to
18363 an offset of 0 from the frame pointer, rather than 8. */
18365 /* The if() statement says:
18367 If the insn is a normal instruction
18368 and if the insn is setting the value in a register
18369 and if the register being set is the register holding the address of the argument
18370 and if the address is computing by an addition
18371 that involves adding to a register
18372 which is the frame pointer
18373 a constant integer
18375 then... */
18377 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18379 if ( GET_CODE (insn) == INSN
18380 && GET_CODE (PATTERN (insn)) == SET
18381 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18382 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18383 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18384 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18385 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18388 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18390 break;
18394 if (value == 0)
18396 debug_rtx (addr);
18397 warning (0, "unable to compute real location of stacked parameter");
18398 value = 8; /* XXX magic hack */
18401 return value;
18404 typedef enum {
18405 T_V8QI,
18406 T_V4HI,
18407 T_V2SI,
18408 T_V2SF,
18409 T_DI,
18410 T_V16QI,
18411 T_V8HI,
18412 T_V4SI,
18413 T_V4SF,
18414 T_V2DI,
18415 T_TI,
18416 T_EI,
18417 T_OI,
18418 T_MAX /* Size of enum. Keep last. */
18419 } neon_builtin_type_mode;
18421 #define TYPE_MODE_BIT(X) (1 << (X))
18423 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18424 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18425 | TYPE_MODE_BIT (T_DI))
18426 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18427 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18428 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18430 #define v8qi_UP T_V8QI
18431 #define v4hi_UP T_V4HI
18432 #define v2si_UP T_V2SI
18433 #define v2sf_UP T_V2SF
18434 #define di_UP T_DI
18435 #define v16qi_UP T_V16QI
18436 #define v8hi_UP T_V8HI
18437 #define v4si_UP T_V4SI
18438 #define v4sf_UP T_V4SF
18439 #define v2di_UP T_V2DI
18440 #define ti_UP T_TI
18441 #define ei_UP T_EI
18442 #define oi_UP T_OI
18444 #define UP(X) X##_UP
18446 typedef enum {
18447 NEON_BINOP,
18448 NEON_TERNOP,
18449 NEON_UNOP,
18450 NEON_GETLANE,
18451 NEON_SETLANE,
18452 NEON_CREATE,
18453 NEON_DUP,
18454 NEON_DUPLANE,
18455 NEON_COMBINE,
18456 NEON_SPLIT,
18457 NEON_LANEMUL,
18458 NEON_LANEMULL,
18459 NEON_LANEMULH,
18460 NEON_LANEMAC,
18461 NEON_SCALARMUL,
18462 NEON_SCALARMULL,
18463 NEON_SCALARMULH,
18464 NEON_SCALARMAC,
18465 NEON_CONVERT,
18466 NEON_FIXCONV,
18467 NEON_SELECT,
18468 NEON_RESULTPAIR,
18469 NEON_REINTERP,
18470 NEON_VTBL,
18471 NEON_VTBX,
18472 NEON_LOAD1,
18473 NEON_LOAD1LANE,
18474 NEON_STORE1,
18475 NEON_STORE1LANE,
18476 NEON_LOADSTRUCT,
18477 NEON_LOADSTRUCTLANE,
18478 NEON_STORESTRUCT,
18479 NEON_STORESTRUCTLANE,
18480 NEON_LOGICBINOP,
18481 NEON_SHIFTINSERT,
18482 NEON_SHIFTIMM,
18483 NEON_SHIFTACC
18484 } neon_itype;
18486 typedef struct {
18487 const char *name;
18488 const neon_itype itype;
18489 const neon_builtin_type_mode mode;
18490 const enum insn_code code;
18491 unsigned int fcode;
18492 } neon_builtin_datum;
18494 #define CF(N,X) CODE_FOR_neon_##N##X
18496 #define VAR1(T, N, A) \
18497 {#N, NEON_##T, UP (A), CF (N, A), 0}
18498 #define VAR2(T, N, A, B) \
18499 VAR1 (T, N, A), \
18500 {#N, NEON_##T, UP (B), CF (N, B), 0}
18501 #define VAR3(T, N, A, B, C) \
18502 VAR2 (T, N, A, B), \
18503 {#N, NEON_##T, UP (C), CF (N, C), 0}
18504 #define VAR4(T, N, A, B, C, D) \
18505 VAR3 (T, N, A, B, C), \
18506 {#N, NEON_##T, UP (D), CF (N, D), 0}
18507 #define VAR5(T, N, A, B, C, D, E) \
18508 VAR4 (T, N, A, B, C, D), \
18509 {#N, NEON_##T, UP (E), CF (N, E), 0}
18510 #define VAR6(T, N, A, B, C, D, E, F) \
18511 VAR5 (T, N, A, B, C, D, E), \
18512 {#N, NEON_##T, UP (F), CF (N, F), 0}
18513 #define VAR7(T, N, A, B, C, D, E, F, G) \
18514 VAR6 (T, N, A, B, C, D, E, F), \
18515 {#N, NEON_##T, UP (G), CF (N, G), 0}
18516 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18517 VAR7 (T, N, A, B, C, D, E, F, G), \
18518 {#N, NEON_##T, UP (H), CF (N, H), 0}
18519 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18520 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18521 {#N, NEON_##T, UP (I), CF (N, I), 0}
18522 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18523 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18524 {#N, NEON_##T, UP (J), CF (N, J), 0}
18526 /* The mode entries in the following table correspond to the "key" type of the
18527 instruction variant, i.e. equivalent to that which would be specified after
18528 the assembler mnemonic, which usually refers to the last vector operand.
18529 (Signed/unsigned/polynomial types are not differentiated between though, and
18530 are all mapped onto the same mode for a given element size.) The modes
18531 listed per instruction should be the same as those defined for that
18532 instruction's pattern in neon.md. */
18534 static neon_builtin_datum neon_builtin_data[] =
18536 VAR10 (BINOP, vadd,
18537 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18538 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18539 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18540 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18541 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18542 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18543 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18544 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18545 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18546 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18547 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18548 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18549 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18550 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18551 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18552 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18553 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18554 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18555 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18556 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18557 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18558 VAR2 (BINOP, vqdmull, v4hi, v2si),
18559 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18560 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18561 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18562 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18563 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18564 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18565 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18566 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18567 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18568 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18569 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18570 VAR10 (BINOP, vsub,
18571 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18572 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18573 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18574 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18575 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18576 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18577 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18578 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18579 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18580 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18581 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18582 VAR2 (BINOP, vcage, v2sf, v4sf),
18583 VAR2 (BINOP, vcagt, v2sf, v4sf),
18584 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18585 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18586 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18587 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18588 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18589 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18590 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18591 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18592 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18593 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18594 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18595 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18596 VAR2 (BINOP, vrecps, v2sf, v4sf),
18597 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18598 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18599 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18600 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18601 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18602 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18603 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18604 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18605 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18606 VAR2 (UNOP, vcnt, v8qi, v16qi),
18607 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18608 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18609 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18610 /* FIXME: vget_lane supports more variants than this! */
18611 VAR10 (GETLANE, vget_lane,
18612 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18613 VAR10 (SETLANE, vset_lane,
18614 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18615 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18616 VAR10 (DUP, vdup_n,
18617 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18618 VAR10 (DUPLANE, vdup_lane,
18619 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18620 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18621 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18622 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18623 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18624 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18625 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18626 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18627 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18628 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18629 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18630 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18631 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18632 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18633 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18634 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18635 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18636 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18637 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18638 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18639 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18640 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18641 VAR10 (BINOP, vext,
18642 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18643 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18644 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18645 VAR2 (UNOP, vrev16, v8qi, v16qi),
18646 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18647 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18648 VAR10 (SELECT, vbsl,
18649 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18650 VAR1 (VTBL, vtbl1, v8qi),
18651 VAR1 (VTBL, vtbl2, v8qi),
18652 VAR1 (VTBL, vtbl3, v8qi),
18653 VAR1 (VTBL, vtbl4, v8qi),
18654 VAR1 (VTBX, vtbx1, v8qi),
18655 VAR1 (VTBX, vtbx2, v8qi),
18656 VAR1 (VTBX, vtbx3, v8qi),
18657 VAR1 (VTBX, vtbx4, v8qi),
18658 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18659 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18660 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18661 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18662 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18663 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18664 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18665 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18666 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18667 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18668 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18669 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18670 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18671 VAR10 (LOAD1, vld1,
18672 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18673 VAR10 (LOAD1LANE, vld1_lane,
18674 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18675 VAR10 (LOAD1, vld1_dup,
18676 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18677 VAR10 (STORE1, vst1,
18678 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18679 VAR10 (STORE1LANE, vst1_lane,
18680 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18681 VAR9 (LOADSTRUCT,
18682 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18683 VAR7 (LOADSTRUCTLANE, vld2_lane,
18684 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18685 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18686 VAR9 (STORESTRUCT, vst2,
18687 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18688 VAR7 (STORESTRUCTLANE, vst2_lane,
18689 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18690 VAR9 (LOADSTRUCT,
18691 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18692 VAR7 (LOADSTRUCTLANE, vld3_lane,
18693 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18694 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18695 VAR9 (STORESTRUCT, vst3,
18696 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18697 VAR7 (STORESTRUCTLANE, vst3_lane,
18698 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18699 VAR9 (LOADSTRUCT, vld4,
18700 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18701 VAR7 (LOADSTRUCTLANE, vld4_lane,
18702 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18703 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18704 VAR9 (STORESTRUCT, vst4,
18705 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18706 VAR7 (STORESTRUCTLANE, vst4_lane,
18707 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18708 VAR10 (LOGICBINOP, vand,
18709 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18710 VAR10 (LOGICBINOP, vorr,
18711 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18712 VAR10 (BINOP, veor,
18713 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18714 VAR10 (LOGICBINOP, vbic,
18715 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18716 VAR10 (LOGICBINOP, vorn,
18717 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18720 #undef CF
18721 #undef VAR1
18722 #undef VAR2
18723 #undef VAR3
18724 #undef VAR4
18725 #undef VAR5
18726 #undef VAR6
18727 #undef VAR7
18728 #undef VAR8
18729 #undef VAR9
18730 #undef VAR10
18732 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18733 symbolic names defined here (which would require too much duplication).
18734 FIXME? */
18735 enum arm_builtins
18737 ARM_BUILTIN_GETWCGR0,
18738 ARM_BUILTIN_GETWCGR1,
18739 ARM_BUILTIN_GETWCGR2,
18740 ARM_BUILTIN_GETWCGR3,
18742 ARM_BUILTIN_SETWCGR0,
18743 ARM_BUILTIN_SETWCGR1,
18744 ARM_BUILTIN_SETWCGR2,
18745 ARM_BUILTIN_SETWCGR3,
18747 ARM_BUILTIN_WZERO,
18749 ARM_BUILTIN_WAVG2BR,
18750 ARM_BUILTIN_WAVG2HR,
18751 ARM_BUILTIN_WAVG2B,
18752 ARM_BUILTIN_WAVG2H,
18754 ARM_BUILTIN_WACCB,
18755 ARM_BUILTIN_WACCH,
18756 ARM_BUILTIN_WACCW,
18758 ARM_BUILTIN_WMACS,
18759 ARM_BUILTIN_WMACSZ,
18760 ARM_BUILTIN_WMACU,
18761 ARM_BUILTIN_WMACUZ,
18763 ARM_BUILTIN_WSADB,
18764 ARM_BUILTIN_WSADBZ,
18765 ARM_BUILTIN_WSADH,
18766 ARM_BUILTIN_WSADHZ,
18768 ARM_BUILTIN_WALIGNI,
18769 ARM_BUILTIN_WALIGNR0,
18770 ARM_BUILTIN_WALIGNR1,
18771 ARM_BUILTIN_WALIGNR2,
18772 ARM_BUILTIN_WALIGNR3,
18774 ARM_BUILTIN_TMIA,
18775 ARM_BUILTIN_TMIAPH,
18776 ARM_BUILTIN_TMIABB,
18777 ARM_BUILTIN_TMIABT,
18778 ARM_BUILTIN_TMIATB,
18779 ARM_BUILTIN_TMIATT,
18781 ARM_BUILTIN_TMOVMSKB,
18782 ARM_BUILTIN_TMOVMSKH,
18783 ARM_BUILTIN_TMOVMSKW,
18785 ARM_BUILTIN_TBCSTB,
18786 ARM_BUILTIN_TBCSTH,
18787 ARM_BUILTIN_TBCSTW,
18789 ARM_BUILTIN_WMADDS,
18790 ARM_BUILTIN_WMADDU,
18792 ARM_BUILTIN_WPACKHSS,
18793 ARM_BUILTIN_WPACKWSS,
18794 ARM_BUILTIN_WPACKDSS,
18795 ARM_BUILTIN_WPACKHUS,
18796 ARM_BUILTIN_WPACKWUS,
18797 ARM_BUILTIN_WPACKDUS,
18799 ARM_BUILTIN_WADDB,
18800 ARM_BUILTIN_WADDH,
18801 ARM_BUILTIN_WADDW,
18802 ARM_BUILTIN_WADDSSB,
18803 ARM_BUILTIN_WADDSSH,
18804 ARM_BUILTIN_WADDSSW,
18805 ARM_BUILTIN_WADDUSB,
18806 ARM_BUILTIN_WADDUSH,
18807 ARM_BUILTIN_WADDUSW,
18808 ARM_BUILTIN_WSUBB,
18809 ARM_BUILTIN_WSUBH,
18810 ARM_BUILTIN_WSUBW,
18811 ARM_BUILTIN_WSUBSSB,
18812 ARM_BUILTIN_WSUBSSH,
18813 ARM_BUILTIN_WSUBSSW,
18814 ARM_BUILTIN_WSUBUSB,
18815 ARM_BUILTIN_WSUBUSH,
18816 ARM_BUILTIN_WSUBUSW,
18818 ARM_BUILTIN_WAND,
18819 ARM_BUILTIN_WANDN,
18820 ARM_BUILTIN_WOR,
18821 ARM_BUILTIN_WXOR,
18823 ARM_BUILTIN_WCMPEQB,
18824 ARM_BUILTIN_WCMPEQH,
18825 ARM_BUILTIN_WCMPEQW,
18826 ARM_BUILTIN_WCMPGTUB,
18827 ARM_BUILTIN_WCMPGTUH,
18828 ARM_BUILTIN_WCMPGTUW,
18829 ARM_BUILTIN_WCMPGTSB,
18830 ARM_BUILTIN_WCMPGTSH,
18831 ARM_BUILTIN_WCMPGTSW,
18833 ARM_BUILTIN_TEXTRMSB,
18834 ARM_BUILTIN_TEXTRMSH,
18835 ARM_BUILTIN_TEXTRMSW,
18836 ARM_BUILTIN_TEXTRMUB,
18837 ARM_BUILTIN_TEXTRMUH,
18838 ARM_BUILTIN_TEXTRMUW,
18839 ARM_BUILTIN_TINSRB,
18840 ARM_BUILTIN_TINSRH,
18841 ARM_BUILTIN_TINSRW,
18843 ARM_BUILTIN_WMAXSW,
18844 ARM_BUILTIN_WMAXSH,
18845 ARM_BUILTIN_WMAXSB,
18846 ARM_BUILTIN_WMAXUW,
18847 ARM_BUILTIN_WMAXUH,
18848 ARM_BUILTIN_WMAXUB,
18849 ARM_BUILTIN_WMINSW,
18850 ARM_BUILTIN_WMINSH,
18851 ARM_BUILTIN_WMINSB,
18852 ARM_BUILTIN_WMINUW,
18853 ARM_BUILTIN_WMINUH,
18854 ARM_BUILTIN_WMINUB,
18856 ARM_BUILTIN_WMULUM,
18857 ARM_BUILTIN_WMULSM,
18858 ARM_BUILTIN_WMULUL,
18860 ARM_BUILTIN_PSADBH,
18861 ARM_BUILTIN_WSHUFH,
18863 ARM_BUILTIN_WSLLH,
18864 ARM_BUILTIN_WSLLW,
18865 ARM_BUILTIN_WSLLD,
18866 ARM_BUILTIN_WSRAH,
18867 ARM_BUILTIN_WSRAW,
18868 ARM_BUILTIN_WSRAD,
18869 ARM_BUILTIN_WSRLH,
18870 ARM_BUILTIN_WSRLW,
18871 ARM_BUILTIN_WSRLD,
18872 ARM_BUILTIN_WRORH,
18873 ARM_BUILTIN_WRORW,
18874 ARM_BUILTIN_WRORD,
18875 ARM_BUILTIN_WSLLHI,
18876 ARM_BUILTIN_WSLLWI,
18877 ARM_BUILTIN_WSLLDI,
18878 ARM_BUILTIN_WSRAHI,
18879 ARM_BUILTIN_WSRAWI,
18880 ARM_BUILTIN_WSRADI,
18881 ARM_BUILTIN_WSRLHI,
18882 ARM_BUILTIN_WSRLWI,
18883 ARM_BUILTIN_WSRLDI,
18884 ARM_BUILTIN_WRORHI,
18885 ARM_BUILTIN_WRORWI,
18886 ARM_BUILTIN_WRORDI,
18888 ARM_BUILTIN_WUNPCKIHB,
18889 ARM_BUILTIN_WUNPCKIHH,
18890 ARM_BUILTIN_WUNPCKIHW,
18891 ARM_BUILTIN_WUNPCKILB,
18892 ARM_BUILTIN_WUNPCKILH,
18893 ARM_BUILTIN_WUNPCKILW,
18895 ARM_BUILTIN_WUNPCKEHSB,
18896 ARM_BUILTIN_WUNPCKEHSH,
18897 ARM_BUILTIN_WUNPCKEHSW,
18898 ARM_BUILTIN_WUNPCKEHUB,
18899 ARM_BUILTIN_WUNPCKEHUH,
18900 ARM_BUILTIN_WUNPCKEHUW,
18901 ARM_BUILTIN_WUNPCKELSB,
18902 ARM_BUILTIN_WUNPCKELSH,
18903 ARM_BUILTIN_WUNPCKELSW,
18904 ARM_BUILTIN_WUNPCKELUB,
18905 ARM_BUILTIN_WUNPCKELUH,
18906 ARM_BUILTIN_WUNPCKELUW,
18908 ARM_BUILTIN_WABSB,
18909 ARM_BUILTIN_WABSH,
18910 ARM_BUILTIN_WABSW,
18912 ARM_BUILTIN_WADDSUBHX,
18913 ARM_BUILTIN_WSUBADDHX,
18915 ARM_BUILTIN_WABSDIFFB,
18916 ARM_BUILTIN_WABSDIFFH,
18917 ARM_BUILTIN_WABSDIFFW,
18919 ARM_BUILTIN_WADDCH,
18920 ARM_BUILTIN_WADDCW,
18922 ARM_BUILTIN_WAVG4,
18923 ARM_BUILTIN_WAVG4R,
18925 ARM_BUILTIN_WMADDSX,
18926 ARM_BUILTIN_WMADDUX,
18928 ARM_BUILTIN_WMADDSN,
18929 ARM_BUILTIN_WMADDUN,
18931 ARM_BUILTIN_WMULWSM,
18932 ARM_BUILTIN_WMULWUM,
18934 ARM_BUILTIN_WMULWSMR,
18935 ARM_BUILTIN_WMULWUMR,
18937 ARM_BUILTIN_WMULWL,
18939 ARM_BUILTIN_WMULSMR,
18940 ARM_BUILTIN_WMULUMR,
18942 ARM_BUILTIN_WQMULM,
18943 ARM_BUILTIN_WQMULMR,
18945 ARM_BUILTIN_WQMULWM,
18946 ARM_BUILTIN_WQMULWMR,
18948 ARM_BUILTIN_WADDBHUSM,
18949 ARM_BUILTIN_WADDBHUSL,
18951 ARM_BUILTIN_WQMIABB,
18952 ARM_BUILTIN_WQMIABT,
18953 ARM_BUILTIN_WQMIATB,
18954 ARM_BUILTIN_WQMIATT,
18956 ARM_BUILTIN_WQMIABBN,
18957 ARM_BUILTIN_WQMIABTN,
18958 ARM_BUILTIN_WQMIATBN,
18959 ARM_BUILTIN_WQMIATTN,
18961 ARM_BUILTIN_WMIABB,
18962 ARM_BUILTIN_WMIABT,
18963 ARM_BUILTIN_WMIATB,
18964 ARM_BUILTIN_WMIATT,
18966 ARM_BUILTIN_WMIABBN,
18967 ARM_BUILTIN_WMIABTN,
18968 ARM_BUILTIN_WMIATBN,
18969 ARM_BUILTIN_WMIATTN,
18971 ARM_BUILTIN_WMIAWBB,
18972 ARM_BUILTIN_WMIAWBT,
18973 ARM_BUILTIN_WMIAWTB,
18974 ARM_BUILTIN_WMIAWTT,
18976 ARM_BUILTIN_WMIAWBBN,
18977 ARM_BUILTIN_WMIAWBTN,
18978 ARM_BUILTIN_WMIAWTBN,
18979 ARM_BUILTIN_WMIAWTTN,
18981 ARM_BUILTIN_WMERGE,
18983 ARM_BUILTIN_THREAD_POINTER,
18985 ARM_BUILTIN_NEON_BASE,
18987 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18990 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18992 static void
18993 arm_init_neon_builtins (void)
18995 unsigned int i, fcode;
18996 tree decl;
18998 tree neon_intQI_type_node;
18999 tree neon_intHI_type_node;
19000 tree neon_polyQI_type_node;
19001 tree neon_polyHI_type_node;
19002 tree neon_intSI_type_node;
19003 tree neon_intDI_type_node;
19004 tree neon_float_type_node;
19006 tree intQI_pointer_node;
19007 tree intHI_pointer_node;
19008 tree intSI_pointer_node;
19009 tree intDI_pointer_node;
19010 tree float_pointer_node;
19012 tree const_intQI_node;
19013 tree const_intHI_node;
19014 tree const_intSI_node;
19015 tree const_intDI_node;
19016 tree const_float_node;
19018 tree const_intQI_pointer_node;
19019 tree const_intHI_pointer_node;
19020 tree const_intSI_pointer_node;
19021 tree const_intDI_pointer_node;
19022 tree const_float_pointer_node;
19024 tree V8QI_type_node;
19025 tree V4HI_type_node;
19026 tree V2SI_type_node;
19027 tree V2SF_type_node;
19028 tree V16QI_type_node;
19029 tree V8HI_type_node;
19030 tree V4SI_type_node;
19031 tree V4SF_type_node;
19032 tree V2DI_type_node;
19034 tree intUQI_type_node;
19035 tree intUHI_type_node;
19036 tree intUSI_type_node;
19037 tree intUDI_type_node;
19039 tree intEI_type_node;
19040 tree intOI_type_node;
19041 tree intCI_type_node;
19042 tree intXI_type_node;
19044 tree V8QI_pointer_node;
19045 tree V4HI_pointer_node;
19046 tree V2SI_pointer_node;
19047 tree V2SF_pointer_node;
19048 tree V16QI_pointer_node;
19049 tree V8HI_pointer_node;
19050 tree V4SI_pointer_node;
19051 tree V4SF_pointer_node;
19052 tree V2DI_pointer_node;
19054 tree void_ftype_pv8qi_v8qi_v8qi;
19055 tree void_ftype_pv4hi_v4hi_v4hi;
19056 tree void_ftype_pv2si_v2si_v2si;
19057 tree void_ftype_pv2sf_v2sf_v2sf;
19058 tree void_ftype_pdi_di_di;
19059 tree void_ftype_pv16qi_v16qi_v16qi;
19060 tree void_ftype_pv8hi_v8hi_v8hi;
19061 tree void_ftype_pv4si_v4si_v4si;
19062 tree void_ftype_pv4sf_v4sf_v4sf;
19063 tree void_ftype_pv2di_v2di_v2di;
19065 tree reinterp_ftype_dreg[5][5];
19066 tree reinterp_ftype_qreg[5][5];
19067 tree dreg_types[5], qreg_types[5];
19069 /* Create distinguished type nodes for NEON vector element types,
19070 and pointers to values of such types, so we can detect them later. */
19071 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19072 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19073 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19074 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19075 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19076 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19077 neon_float_type_node = make_node (REAL_TYPE);
19078 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19079 layout_type (neon_float_type_node);
19081 /* Define typedefs which exactly correspond to the modes we are basing vector
19082 types on. If you change these names you'll need to change
19083 the table used by arm_mangle_type too. */
19084 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19085 "__builtin_neon_qi");
19086 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19087 "__builtin_neon_hi");
19088 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19089 "__builtin_neon_si");
19090 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19091 "__builtin_neon_sf");
19092 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19093 "__builtin_neon_di");
19094 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19095 "__builtin_neon_poly8");
19096 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19097 "__builtin_neon_poly16");
19099 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19100 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19101 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19102 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19103 float_pointer_node = build_pointer_type (neon_float_type_node);
19105 /* Next create constant-qualified versions of the above types. */
19106 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19107 TYPE_QUAL_CONST);
19108 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19109 TYPE_QUAL_CONST);
19110 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19111 TYPE_QUAL_CONST);
19112 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19113 TYPE_QUAL_CONST);
19114 const_float_node = build_qualified_type (neon_float_type_node,
19115 TYPE_QUAL_CONST);
19117 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19118 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19119 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19120 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19121 const_float_pointer_node = build_pointer_type (const_float_node);
19123 /* Now create vector types based on our NEON element types. */
19124 /* 64-bit vectors. */
19125 V8QI_type_node =
19126 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19127 V4HI_type_node =
19128 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19129 V2SI_type_node =
19130 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19131 V2SF_type_node =
19132 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19133 /* 128-bit vectors. */
19134 V16QI_type_node =
19135 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19136 V8HI_type_node =
19137 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19138 V4SI_type_node =
19139 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19140 V4SF_type_node =
19141 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19142 V2DI_type_node =
19143 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19145 /* Unsigned integer types for various mode sizes. */
19146 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19147 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19148 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19149 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19151 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19152 "__builtin_neon_uqi");
19153 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19154 "__builtin_neon_uhi");
19155 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19156 "__builtin_neon_usi");
19157 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19158 "__builtin_neon_udi");
19160 /* Opaque integer types for structures of vectors. */
19161 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19162 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19163 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19164 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19166 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19167 "__builtin_neon_ti");
19168 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19169 "__builtin_neon_ei");
19170 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19171 "__builtin_neon_oi");
19172 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19173 "__builtin_neon_ci");
19174 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19175 "__builtin_neon_xi");
19177 /* Pointers to vector types. */
19178 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19179 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19180 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19181 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19182 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19183 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19184 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19185 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19186 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19188 /* Operations which return results as pairs. */
19189 void_ftype_pv8qi_v8qi_v8qi =
19190 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19191 V8QI_type_node, NULL);
19192 void_ftype_pv4hi_v4hi_v4hi =
19193 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19194 V4HI_type_node, NULL);
19195 void_ftype_pv2si_v2si_v2si =
19196 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19197 V2SI_type_node, NULL);
19198 void_ftype_pv2sf_v2sf_v2sf =
19199 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19200 V2SF_type_node, NULL);
19201 void_ftype_pdi_di_di =
19202 build_function_type_list (void_type_node, intDI_pointer_node,
19203 neon_intDI_type_node, neon_intDI_type_node, NULL);
19204 void_ftype_pv16qi_v16qi_v16qi =
19205 build_function_type_list (void_type_node, V16QI_pointer_node,
19206 V16QI_type_node, V16QI_type_node, NULL);
19207 void_ftype_pv8hi_v8hi_v8hi =
19208 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19209 V8HI_type_node, NULL);
19210 void_ftype_pv4si_v4si_v4si =
19211 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19212 V4SI_type_node, NULL);
19213 void_ftype_pv4sf_v4sf_v4sf =
19214 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19215 V4SF_type_node, NULL);
19216 void_ftype_pv2di_v2di_v2di =
19217 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19218 V2DI_type_node, NULL);
19220 dreg_types[0] = V8QI_type_node;
19221 dreg_types[1] = V4HI_type_node;
19222 dreg_types[2] = V2SI_type_node;
19223 dreg_types[3] = V2SF_type_node;
19224 dreg_types[4] = neon_intDI_type_node;
19226 qreg_types[0] = V16QI_type_node;
19227 qreg_types[1] = V8HI_type_node;
19228 qreg_types[2] = V4SI_type_node;
19229 qreg_types[3] = V4SF_type_node;
19230 qreg_types[4] = V2DI_type_node;
19232 for (i = 0; i < 5; i++)
19234 int j;
19235 for (j = 0; j < 5; j++)
19237 reinterp_ftype_dreg[i][j]
19238 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19239 reinterp_ftype_qreg[i][j]
19240 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19244 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19245 i < ARRAY_SIZE (neon_builtin_data);
19246 i++, fcode++)
19248 neon_builtin_datum *d = &neon_builtin_data[i];
19250 const char* const modenames[] = {
19251 "v8qi", "v4hi", "v2si", "v2sf", "di",
19252 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19253 "ti", "ei", "oi"
19255 char namebuf[60];
19256 tree ftype = NULL;
19257 int is_load = 0, is_store = 0;
19259 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19261 d->fcode = fcode;
19263 switch (d->itype)
19265 case NEON_LOAD1:
19266 case NEON_LOAD1LANE:
19267 case NEON_LOADSTRUCT:
19268 case NEON_LOADSTRUCTLANE:
19269 is_load = 1;
19270 /* Fall through. */
19271 case NEON_STORE1:
19272 case NEON_STORE1LANE:
19273 case NEON_STORESTRUCT:
19274 case NEON_STORESTRUCTLANE:
19275 if (!is_load)
19276 is_store = 1;
19277 /* Fall through. */
19278 case NEON_UNOP:
19279 case NEON_BINOP:
19280 case NEON_LOGICBINOP:
19281 case NEON_SHIFTINSERT:
19282 case NEON_TERNOP:
19283 case NEON_GETLANE:
19284 case NEON_SETLANE:
19285 case NEON_CREATE:
19286 case NEON_DUP:
19287 case NEON_DUPLANE:
19288 case NEON_SHIFTIMM:
19289 case NEON_SHIFTACC:
19290 case NEON_COMBINE:
19291 case NEON_SPLIT:
19292 case NEON_CONVERT:
19293 case NEON_FIXCONV:
19294 case NEON_LANEMUL:
19295 case NEON_LANEMULL:
19296 case NEON_LANEMULH:
19297 case NEON_LANEMAC:
19298 case NEON_SCALARMUL:
19299 case NEON_SCALARMULL:
19300 case NEON_SCALARMULH:
19301 case NEON_SCALARMAC:
19302 case NEON_SELECT:
19303 case NEON_VTBL:
19304 case NEON_VTBX:
19306 int k;
19307 tree return_type = void_type_node, args = void_list_node;
19309 /* Build a function type directly from the insn_data for
19310 this builtin. The build_function_type() function takes
19311 care of removing duplicates for us. */
19312 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19314 tree eltype;
19316 if (is_load && k == 1)
19318 /* Neon load patterns always have the memory
19319 operand in the operand 1 position. */
19320 gcc_assert (insn_data[d->code].operand[k].predicate
19321 == neon_struct_operand);
19323 switch (d->mode)
19325 case T_V8QI:
19326 case T_V16QI:
19327 eltype = const_intQI_pointer_node;
19328 break;
19330 case T_V4HI:
19331 case T_V8HI:
19332 eltype = const_intHI_pointer_node;
19333 break;
19335 case T_V2SI:
19336 case T_V4SI:
19337 eltype = const_intSI_pointer_node;
19338 break;
19340 case T_V2SF:
19341 case T_V4SF:
19342 eltype = const_float_pointer_node;
19343 break;
19345 case T_DI:
19346 case T_V2DI:
19347 eltype = const_intDI_pointer_node;
19348 break;
19350 default: gcc_unreachable ();
19353 else if (is_store && k == 0)
19355 /* Similarly, Neon store patterns use operand 0 as
19356 the memory location to store to. */
19357 gcc_assert (insn_data[d->code].operand[k].predicate
19358 == neon_struct_operand);
19360 switch (d->mode)
19362 case T_V8QI:
19363 case T_V16QI:
19364 eltype = intQI_pointer_node;
19365 break;
19367 case T_V4HI:
19368 case T_V8HI:
19369 eltype = intHI_pointer_node;
19370 break;
19372 case T_V2SI:
19373 case T_V4SI:
19374 eltype = intSI_pointer_node;
19375 break;
19377 case T_V2SF:
19378 case T_V4SF:
19379 eltype = float_pointer_node;
19380 break;
19382 case T_DI:
19383 case T_V2DI:
19384 eltype = intDI_pointer_node;
19385 break;
19387 default: gcc_unreachable ();
19390 else
19392 switch (insn_data[d->code].operand[k].mode)
19394 case VOIDmode: eltype = void_type_node; break;
19395 /* Scalars. */
19396 case QImode: eltype = neon_intQI_type_node; break;
19397 case HImode: eltype = neon_intHI_type_node; break;
19398 case SImode: eltype = neon_intSI_type_node; break;
19399 case SFmode: eltype = neon_float_type_node; break;
19400 case DImode: eltype = neon_intDI_type_node; break;
19401 case TImode: eltype = intTI_type_node; break;
19402 case EImode: eltype = intEI_type_node; break;
19403 case OImode: eltype = intOI_type_node; break;
19404 case CImode: eltype = intCI_type_node; break;
19405 case XImode: eltype = intXI_type_node; break;
19406 /* 64-bit vectors. */
19407 case V8QImode: eltype = V8QI_type_node; break;
19408 case V4HImode: eltype = V4HI_type_node; break;
19409 case V2SImode: eltype = V2SI_type_node; break;
19410 case V2SFmode: eltype = V2SF_type_node; break;
19411 /* 128-bit vectors. */
19412 case V16QImode: eltype = V16QI_type_node; break;
19413 case V8HImode: eltype = V8HI_type_node; break;
19414 case V4SImode: eltype = V4SI_type_node; break;
19415 case V4SFmode: eltype = V4SF_type_node; break;
19416 case V2DImode: eltype = V2DI_type_node; break;
19417 default: gcc_unreachable ();
19421 if (k == 0 && !is_store)
19422 return_type = eltype;
19423 else
19424 args = tree_cons (NULL_TREE, eltype, args);
19427 ftype = build_function_type (return_type, args);
19429 break;
19431 case NEON_RESULTPAIR:
19433 switch (insn_data[d->code].operand[1].mode)
19435 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19436 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19437 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19438 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19439 case DImode: ftype = void_ftype_pdi_di_di; break;
19440 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19441 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19442 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19443 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19444 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19445 default: gcc_unreachable ();
19448 break;
19450 case NEON_REINTERP:
19452 /* We iterate over 5 doubleword types, then 5 quadword
19453 types. */
19454 int rhs = d->mode % 5;
19455 switch (insn_data[d->code].operand[0].mode)
19457 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19458 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19459 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19460 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19461 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19462 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19463 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19464 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19465 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19466 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19467 default: gcc_unreachable ();
19470 break;
19472 default:
19473 gcc_unreachable ();
19476 gcc_assert (ftype != NULL);
19478 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19480 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19481 NULL_TREE);
19482 arm_builtin_decls[fcode] = decl;
19486 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19487 do \
19489 if ((MASK) & insn_flags) \
19491 tree bdecl; \
19492 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19493 BUILT_IN_MD, NULL, NULL_TREE); \
19494 arm_builtin_decls[CODE] = bdecl; \
19497 while (0)
19499 struct builtin_description
19501 const unsigned int mask;
19502 const enum insn_code icode;
19503 const char * const name;
19504 const enum arm_builtins code;
19505 const enum rtx_code comparison;
19506 const unsigned int flag;
19509 static const struct builtin_description bdesc_2arg[] =
19511 #define IWMMXT_BUILTIN(code, string, builtin) \
19512 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19513 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19515 #define IWMMXT2_BUILTIN(code, string, builtin) \
19516 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19517 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19519 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19520 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19521 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19522 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19523 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19524 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19525 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19526 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19527 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19528 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19529 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19530 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19531 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19532 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19533 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19534 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19535 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19536 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19537 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19538 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19539 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19540 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19541 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19542 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19543 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19544 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19545 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19546 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19547 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19548 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19549 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19550 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19551 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19552 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19553 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19554 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19555 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19556 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19557 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19558 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19559 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19560 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19561 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19562 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19563 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19564 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19565 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19566 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19567 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19568 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19569 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19570 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19571 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19572 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19573 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19574 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19575 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19576 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19577 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19578 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19579 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19580 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19581 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19582 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19583 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19584 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19585 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19586 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19587 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19588 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19589 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19590 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19591 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19592 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19593 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19594 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19595 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19596 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19598 #define IWMMXT_BUILTIN2(code, builtin) \
19599 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19601 #define IWMMXT2_BUILTIN2(code, builtin) \
19602 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19604 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19605 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19606 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19607 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19608 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19609 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19610 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19611 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19612 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19613 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19616 static const struct builtin_description bdesc_1arg[] =
19618 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19619 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19620 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19621 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19622 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19623 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19624 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19625 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19626 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19627 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19628 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19629 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19630 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19631 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19632 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19633 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19634 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19635 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19636 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19637 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19638 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19639 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19640 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19641 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19644 /* Set up all the iWMMXt builtins. This is not called if
19645 TARGET_IWMMXT is zero. */
19647 static void
19648 arm_init_iwmmxt_builtins (void)
19650 const struct builtin_description * d;
19651 size_t i;
19653 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19654 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19655 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19657 tree v8qi_ftype_v8qi_v8qi_int
19658 = build_function_type_list (V8QI_type_node,
19659 V8QI_type_node, V8QI_type_node,
19660 integer_type_node, NULL_TREE);
19661 tree v4hi_ftype_v4hi_int
19662 = build_function_type_list (V4HI_type_node,
19663 V4HI_type_node, integer_type_node, NULL_TREE);
19664 tree v2si_ftype_v2si_int
19665 = build_function_type_list (V2SI_type_node,
19666 V2SI_type_node, integer_type_node, NULL_TREE);
19667 tree v2si_ftype_di_di
19668 = build_function_type_list (V2SI_type_node,
19669 long_long_integer_type_node,
19670 long_long_integer_type_node,
19671 NULL_TREE);
19672 tree di_ftype_di_int
19673 = build_function_type_list (long_long_integer_type_node,
19674 long_long_integer_type_node,
19675 integer_type_node, NULL_TREE);
19676 tree di_ftype_di_int_int
19677 = build_function_type_list (long_long_integer_type_node,
19678 long_long_integer_type_node,
19679 integer_type_node,
19680 integer_type_node, NULL_TREE);
19681 tree int_ftype_v8qi
19682 = build_function_type_list (integer_type_node,
19683 V8QI_type_node, NULL_TREE);
19684 tree int_ftype_v4hi
19685 = build_function_type_list (integer_type_node,
19686 V4HI_type_node, NULL_TREE);
19687 tree int_ftype_v2si
19688 = build_function_type_list (integer_type_node,
19689 V2SI_type_node, NULL_TREE);
19690 tree int_ftype_v8qi_int
19691 = build_function_type_list (integer_type_node,
19692 V8QI_type_node, integer_type_node, NULL_TREE);
19693 tree int_ftype_v4hi_int
19694 = build_function_type_list (integer_type_node,
19695 V4HI_type_node, integer_type_node, NULL_TREE);
19696 tree int_ftype_v2si_int
19697 = build_function_type_list (integer_type_node,
19698 V2SI_type_node, integer_type_node, NULL_TREE);
19699 tree v8qi_ftype_v8qi_int_int
19700 = build_function_type_list (V8QI_type_node,
19701 V8QI_type_node, integer_type_node,
19702 integer_type_node, NULL_TREE);
19703 tree v4hi_ftype_v4hi_int_int
19704 = build_function_type_list (V4HI_type_node,
19705 V4HI_type_node, integer_type_node,
19706 integer_type_node, NULL_TREE);
19707 tree v2si_ftype_v2si_int_int
19708 = build_function_type_list (V2SI_type_node,
19709 V2SI_type_node, integer_type_node,
19710 integer_type_node, NULL_TREE);
19711 /* Miscellaneous. */
19712 tree v8qi_ftype_v4hi_v4hi
19713 = build_function_type_list (V8QI_type_node,
19714 V4HI_type_node, V4HI_type_node, NULL_TREE);
19715 tree v4hi_ftype_v2si_v2si
19716 = build_function_type_list (V4HI_type_node,
19717 V2SI_type_node, V2SI_type_node, NULL_TREE);
19718 tree v8qi_ftype_v4hi_v8qi
19719 = build_function_type_list (V8QI_type_node,
19720 V4HI_type_node, V8QI_type_node, NULL_TREE);
19721 tree v2si_ftype_v4hi_v4hi
19722 = build_function_type_list (V2SI_type_node,
19723 V4HI_type_node, V4HI_type_node, NULL_TREE);
19724 tree v2si_ftype_v8qi_v8qi
19725 = build_function_type_list (V2SI_type_node,
19726 V8QI_type_node, V8QI_type_node, NULL_TREE);
19727 tree v4hi_ftype_v4hi_di
19728 = build_function_type_list (V4HI_type_node,
19729 V4HI_type_node, long_long_integer_type_node,
19730 NULL_TREE);
19731 tree v2si_ftype_v2si_di
19732 = build_function_type_list (V2SI_type_node,
19733 V2SI_type_node, long_long_integer_type_node,
19734 NULL_TREE);
19735 tree di_ftype_void
19736 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19737 tree int_ftype_void
19738 = build_function_type_list (integer_type_node, NULL_TREE);
19739 tree di_ftype_v8qi
19740 = build_function_type_list (long_long_integer_type_node,
19741 V8QI_type_node, NULL_TREE);
19742 tree di_ftype_v4hi
19743 = build_function_type_list (long_long_integer_type_node,
19744 V4HI_type_node, NULL_TREE);
19745 tree di_ftype_v2si
19746 = build_function_type_list (long_long_integer_type_node,
19747 V2SI_type_node, NULL_TREE);
19748 tree v2si_ftype_v4hi
19749 = build_function_type_list (V2SI_type_node,
19750 V4HI_type_node, NULL_TREE);
19751 tree v4hi_ftype_v8qi
19752 = build_function_type_list (V4HI_type_node,
19753 V8QI_type_node, NULL_TREE);
19754 tree v8qi_ftype_v8qi
19755 = build_function_type_list (V8QI_type_node,
19756 V8QI_type_node, NULL_TREE);
19757 tree v4hi_ftype_v4hi
19758 = build_function_type_list (V4HI_type_node,
19759 V4HI_type_node, NULL_TREE);
19760 tree v2si_ftype_v2si
19761 = build_function_type_list (V2SI_type_node,
19762 V2SI_type_node, NULL_TREE);
19764 tree di_ftype_di_v4hi_v4hi
19765 = build_function_type_list (long_long_unsigned_type_node,
19766 long_long_unsigned_type_node,
19767 V4HI_type_node, V4HI_type_node,
19768 NULL_TREE);
19770 tree di_ftype_v4hi_v4hi
19771 = build_function_type_list (long_long_unsigned_type_node,
19772 V4HI_type_node,V4HI_type_node,
19773 NULL_TREE);
19775 tree v2si_ftype_v2si_v4hi_v4hi
19776 = build_function_type_list (V2SI_type_node,
19777 V2SI_type_node, V4HI_type_node,
19778 V4HI_type_node, NULL_TREE);
19780 tree v2si_ftype_v2si_v8qi_v8qi
19781 = build_function_type_list (V2SI_type_node,
19782 V2SI_type_node, V8QI_type_node,
19783 V8QI_type_node, NULL_TREE);
19785 tree di_ftype_di_v2si_v2si
19786 = build_function_type_list (long_long_unsigned_type_node,
19787 long_long_unsigned_type_node,
19788 V2SI_type_node, V2SI_type_node,
19789 NULL_TREE);
19791 tree di_ftype_di_di_int
19792 = build_function_type_list (long_long_unsigned_type_node,
19793 long_long_unsigned_type_node,
19794 long_long_unsigned_type_node,
19795 integer_type_node, NULL_TREE);
19797 tree void_ftype_int
19798 = build_function_type_list (void_type_node,
19799 integer_type_node, NULL_TREE);
19801 tree v8qi_ftype_char
19802 = build_function_type_list (V8QI_type_node,
19803 signed_char_type_node, NULL_TREE);
19805 tree v4hi_ftype_short
19806 = build_function_type_list (V4HI_type_node,
19807 short_integer_type_node, NULL_TREE);
19809 tree v2si_ftype_int
19810 = build_function_type_list (V2SI_type_node,
19811 integer_type_node, NULL_TREE);
19813 /* Normal vector binops. */
19814 tree v8qi_ftype_v8qi_v8qi
19815 = build_function_type_list (V8QI_type_node,
19816 V8QI_type_node, V8QI_type_node, NULL_TREE);
19817 tree v4hi_ftype_v4hi_v4hi
19818 = build_function_type_list (V4HI_type_node,
19819 V4HI_type_node,V4HI_type_node, NULL_TREE);
19820 tree v2si_ftype_v2si_v2si
19821 = build_function_type_list (V2SI_type_node,
19822 V2SI_type_node, V2SI_type_node, NULL_TREE);
19823 tree di_ftype_di_di
19824 = build_function_type_list (long_long_unsigned_type_node,
19825 long_long_unsigned_type_node,
19826 long_long_unsigned_type_node,
19827 NULL_TREE);
19829 /* Add all builtins that are more or less simple operations on two
19830 operands. */
19831 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19833 /* Use one of the operands; the target can have a different mode for
19834 mask-generating compares. */
19835 enum machine_mode mode;
19836 tree type;
19838 if (d->name == 0)
19839 continue;
19841 mode = insn_data[d->icode].operand[1].mode;
19843 switch (mode)
19845 case V8QImode:
19846 type = v8qi_ftype_v8qi_v8qi;
19847 break;
19848 case V4HImode:
19849 type = v4hi_ftype_v4hi_v4hi;
19850 break;
19851 case V2SImode:
19852 type = v2si_ftype_v2si_v2si;
19853 break;
19854 case DImode:
19855 type = di_ftype_di_di;
19856 break;
19858 default:
19859 gcc_unreachable ();
19862 def_mbuiltin (d->mask, d->name, type, d->code);
19865 /* Add the remaining MMX insns with somewhat more complicated types. */
19866 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19867 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19868 ARM_BUILTIN_ ## CODE)
19870 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
19871 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
19872 ARM_BUILTIN_ ## CODE)
19874 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19875 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
19876 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
19877 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
19878 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
19879 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
19880 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
19881 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
19882 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
19884 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19885 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19886 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19887 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19888 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19889 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19891 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19892 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19893 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19894 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19895 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19896 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19898 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19899 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19900 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19901 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19902 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19903 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19905 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19906 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19907 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19908 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19909 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19910 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19912 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19914 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
19915 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
19916 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
19917 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
19918 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
19919 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
19920 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
19921 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
19922 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19923 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19925 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19926 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19927 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19928 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19929 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19930 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19931 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19932 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19933 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19935 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19936 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19937 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19939 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19940 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19941 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19943 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
19944 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
19946 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19947 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19948 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19949 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19950 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19951 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19953 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19954 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19955 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19956 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19957 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19958 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19959 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19960 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19961 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19962 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19963 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19964 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19966 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19967 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19968 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19969 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19971 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
19972 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19973 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19974 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19975 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19976 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19977 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19979 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
19980 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
19981 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
19983 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
19984 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
19985 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
19986 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
19988 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
19989 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
19990 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
19991 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
19993 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
19994 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
19995 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
19996 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
19998 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
19999 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20000 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20001 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20003 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20004 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20005 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20006 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20008 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20009 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20010 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20011 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20013 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20015 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20016 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20017 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20019 #undef iwmmx_mbuiltin
20020 #undef iwmmx2_mbuiltin
20023 static void
20024 arm_init_tls_builtins (void)
20026 tree ftype, decl;
20028 ftype = build_function_type (ptr_type_node, void_list_node);
20029 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20030 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20031 NULL, NULL_TREE);
20032 TREE_NOTHROW (decl) = 1;
20033 TREE_READONLY (decl) = 1;
20034 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20037 static void
20038 arm_init_fp16_builtins (void)
20040 tree fp16_type = make_node (REAL_TYPE);
20041 TYPE_PRECISION (fp16_type) = 16;
20042 layout_type (fp16_type);
20043 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20046 static void
20047 arm_init_builtins (void)
20049 arm_init_tls_builtins ();
20051 if (TARGET_REALLY_IWMMXT)
20052 arm_init_iwmmxt_builtins ();
20054 if (TARGET_NEON)
20055 arm_init_neon_builtins ();
20057 if (arm_fp16_format)
20058 arm_init_fp16_builtins ();
20061 /* Return the ARM builtin for CODE. */
20063 static tree
20064 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20066 if (code >= ARM_BUILTIN_MAX)
20067 return error_mark_node;
20069 return arm_builtin_decls[code];
20072 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20074 static const char *
20075 arm_invalid_parameter_type (const_tree t)
20077 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20078 return N_("function parameters cannot have __fp16 type");
20079 return NULL;
20082 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20084 static const char *
20085 arm_invalid_return_type (const_tree t)
20087 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20088 return N_("functions cannot return __fp16 type");
20089 return NULL;
20092 /* Implement TARGET_PROMOTED_TYPE. */
20094 static tree
20095 arm_promoted_type (const_tree t)
20097 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20098 return float_type_node;
20099 return NULL_TREE;
20102 /* Implement TARGET_CONVERT_TO_TYPE.
20103 Specifically, this hook implements the peculiarity of the ARM
20104 half-precision floating-point C semantics that requires conversions between
20105 __fp16 to or from double to do an intermediate conversion to float. */
20107 static tree
20108 arm_convert_to_type (tree type, tree expr)
20110 tree fromtype = TREE_TYPE (expr);
20111 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20112 return NULL_TREE;
20113 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20114 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20115 return convert (type, convert (float_type_node, expr));
20116 return NULL_TREE;
20119 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20120 This simply adds HFmode as a supported mode; even though we don't
20121 implement arithmetic on this type directly, it's supported by
20122 optabs conversions, much the way the double-word arithmetic is
20123 special-cased in the default hook. */
20125 static bool
20126 arm_scalar_mode_supported_p (enum machine_mode mode)
20128 if (mode == HFmode)
20129 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20130 else if (ALL_FIXED_POINT_MODE_P (mode))
20131 return true;
20132 else
20133 return default_scalar_mode_supported_p (mode);
20136 /* Errors in the source file can cause expand_expr to return const0_rtx
20137 where we expect a vector. To avoid crashing, use one of the vector
20138 clear instructions. */
20140 static rtx
20141 safe_vector_operand (rtx x, enum machine_mode mode)
20143 if (x != const0_rtx)
20144 return x;
20145 x = gen_reg_rtx (mode);
20147 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20148 : gen_rtx_SUBREG (DImode, x, 0)));
20149 return x;
20152 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20154 static rtx
20155 arm_expand_binop_builtin (enum insn_code icode,
20156 tree exp, rtx target)
20158 rtx pat;
20159 tree arg0 = CALL_EXPR_ARG (exp, 0);
20160 tree arg1 = CALL_EXPR_ARG (exp, 1);
20161 rtx op0 = expand_normal (arg0);
20162 rtx op1 = expand_normal (arg1);
20163 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20164 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20165 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20167 if (VECTOR_MODE_P (mode0))
20168 op0 = safe_vector_operand (op0, mode0);
20169 if (VECTOR_MODE_P (mode1))
20170 op1 = safe_vector_operand (op1, mode1);
20172 if (! target
20173 || GET_MODE (target) != tmode
20174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20175 target = gen_reg_rtx (tmode);
20177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20180 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20181 op0 = copy_to_mode_reg (mode0, op0);
20182 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20183 op1 = copy_to_mode_reg (mode1, op1);
20185 pat = GEN_FCN (icode) (target, op0, op1);
20186 if (! pat)
20187 return 0;
20188 emit_insn (pat);
20189 return target;
20192 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20194 static rtx
20195 arm_expand_unop_builtin (enum insn_code icode,
20196 tree exp, rtx target, int do_load)
20198 rtx pat;
20199 tree arg0 = CALL_EXPR_ARG (exp, 0);
20200 rtx op0 = expand_normal (arg0);
20201 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20202 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20204 if (! target
20205 || GET_MODE (target) != tmode
20206 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20207 target = gen_reg_rtx (tmode);
20208 if (do_load)
20209 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20210 else
20212 if (VECTOR_MODE_P (mode0))
20213 op0 = safe_vector_operand (op0, mode0);
20215 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20216 op0 = copy_to_mode_reg (mode0, op0);
20219 pat = GEN_FCN (icode) (target, op0);
20220 if (! pat)
20221 return 0;
20222 emit_insn (pat);
20223 return target;
20226 typedef enum {
20227 NEON_ARG_COPY_TO_REG,
20228 NEON_ARG_CONSTANT,
20229 NEON_ARG_MEMORY,
20230 NEON_ARG_STOP
20231 } builtin_arg;
20233 #define NEON_MAX_BUILTIN_ARGS 5
20235 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20236 and return an expression for the accessed memory.
20238 The intrinsic function operates on a block of registers that has
20239 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20240 The function references the memory at EXP in mode MEM_MODE;
20241 this mode may be BLKmode if no more suitable mode is available. */
20243 static tree
20244 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20245 enum machine_mode reg_mode,
20246 neon_builtin_type_mode type_mode)
20248 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20249 tree elem_type, upper_bound, array_type;
20251 /* Work out the size of the register block in bytes. */
20252 reg_size = GET_MODE_SIZE (reg_mode);
20254 /* Work out the size of each vector in bytes. */
20255 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20256 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20258 /* Work out how many vectors there are. */
20259 gcc_assert (reg_size % vector_size == 0);
20260 nvectors = reg_size / vector_size;
20262 /* Work out how many elements are being loaded or stored.
20263 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20264 and memory elements; anything else implies a lane load or store. */
20265 if (mem_mode == reg_mode)
20266 nelems = vector_size * nvectors;
20267 else
20268 nelems = nvectors;
20270 /* Work out the type of each element. */
20271 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20272 elem_type = TREE_TYPE (TREE_TYPE (exp));
20274 /* Create a type that describes the full access. */
20275 upper_bound = build_int_cst (size_type_node, nelems - 1);
20276 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20278 /* Dereference EXP using that type. */
20279 return fold_build2 (MEM_REF, array_type, exp,
20280 build_int_cst (build_pointer_type (array_type), 0));
20283 /* Expand a Neon builtin. */
20284 static rtx
20285 arm_expand_neon_args (rtx target, int icode, int have_retval,
20286 neon_builtin_type_mode type_mode,
20287 tree exp, ...)
20289 va_list ap;
20290 rtx pat;
20291 tree arg[NEON_MAX_BUILTIN_ARGS];
20292 rtx op[NEON_MAX_BUILTIN_ARGS];
20293 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20294 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20295 enum machine_mode other_mode;
20296 int argc = 0;
20297 int opno;
20299 if (have_retval
20300 && (!target
20301 || GET_MODE (target) != tmode
20302 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20303 target = gen_reg_rtx (tmode);
20305 va_start (ap, exp);
20307 for (;;)
20309 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20311 if (thisarg == NEON_ARG_STOP)
20312 break;
20313 else
20315 opno = argc + have_retval;
20316 mode[argc] = insn_data[icode].operand[opno].mode;
20317 arg[argc] = CALL_EXPR_ARG (exp, argc);
20318 if (thisarg == NEON_ARG_MEMORY)
20320 other_mode = insn_data[icode].operand[1 - opno].mode;
20321 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20322 other_mode, type_mode);
20324 op[argc] = expand_normal (arg[argc]);
20326 switch (thisarg)
20328 case NEON_ARG_COPY_TO_REG:
20329 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20330 if (!(*insn_data[icode].operand[opno].predicate)
20331 (op[argc], mode[argc]))
20332 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20333 break;
20335 case NEON_ARG_CONSTANT:
20336 /* FIXME: This error message is somewhat unhelpful. */
20337 if (!(*insn_data[icode].operand[opno].predicate)
20338 (op[argc], mode[argc]))
20339 error ("argument must be a constant");
20340 break;
20342 case NEON_ARG_MEMORY:
20343 gcc_assert (MEM_P (op[argc]));
20344 PUT_MODE (op[argc], mode[argc]);
20345 /* ??? arm_neon.h uses the same built-in functions for signed
20346 and unsigned accesses, casting where necessary. This isn't
20347 alias safe. */
20348 set_mem_alias_set (op[argc], 0);
20349 if (!(*insn_data[icode].operand[opno].predicate)
20350 (op[argc], mode[argc]))
20351 op[argc] = (replace_equiv_address
20352 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20353 break;
20355 case NEON_ARG_STOP:
20356 gcc_unreachable ();
20359 argc++;
20363 va_end (ap);
20365 if (have_retval)
20366 switch (argc)
20368 case 1:
20369 pat = GEN_FCN (icode) (target, op[0]);
20370 break;
20372 case 2:
20373 pat = GEN_FCN (icode) (target, op[0], op[1]);
20374 break;
20376 case 3:
20377 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20378 break;
20380 case 4:
20381 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20382 break;
20384 case 5:
20385 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20386 break;
20388 default:
20389 gcc_unreachable ();
20391 else
20392 switch (argc)
20394 case 1:
20395 pat = GEN_FCN (icode) (op[0]);
20396 break;
20398 case 2:
20399 pat = GEN_FCN (icode) (op[0], op[1]);
20400 break;
20402 case 3:
20403 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20404 break;
20406 case 4:
20407 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20408 break;
20410 case 5:
20411 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20412 break;
20414 default:
20415 gcc_unreachable ();
20418 if (!pat)
20419 return 0;
20421 emit_insn (pat);
20423 return target;
20426 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20427 constants defined per-instruction or per instruction-variant. Instead, the
20428 required info is looked up in the table neon_builtin_data. */
20429 static rtx
20430 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20432 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20433 neon_itype itype = d->itype;
20434 enum insn_code icode = d->code;
20435 neon_builtin_type_mode type_mode = d->mode;
20437 switch (itype)
20439 case NEON_UNOP:
20440 case NEON_CONVERT:
20441 case NEON_DUPLANE:
20442 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20443 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20445 case NEON_BINOP:
20446 case NEON_SETLANE:
20447 case NEON_SCALARMUL:
20448 case NEON_SCALARMULL:
20449 case NEON_SCALARMULH:
20450 case NEON_SHIFTINSERT:
20451 case NEON_LOGICBINOP:
20452 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20453 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20454 NEON_ARG_STOP);
20456 case NEON_TERNOP:
20457 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20458 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20459 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20461 case NEON_GETLANE:
20462 case NEON_FIXCONV:
20463 case NEON_SHIFTIMM:
20464 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20465 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20466 NEON_ARG_STOP);
20468 case NEON_CREATE:
20469 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20470 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20472 case NEON_DUP:
20473 case NEON_SPLIT:
20474 case NEON_REINTERP:
20475 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20476 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20478 case NEON_COMBINE:
20479 case NEON_VTBL:
20480 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20481 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20483 case NEON_RESULTPAIR:
20484 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20485 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20486 NEON_ARG_STOP);
20488 case NEON_LANEMUL:
20489 case NEON_LANEMULL:
20490 case NEON_LANEMULH:
20491 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20492 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20493 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20495 case NEON_LANEMAC:
20496 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20497 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20498 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20500 case NEON_SHIFTACC:
20501 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20502 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20503 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20505 case NEON_SCALARMAC:
20506 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20507 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20508 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20510 case NEON_SELECT:
20511 case NEON_VTBX:
20512 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20513 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20514 NEON_ARG_STOP);
20516 case NEON_LOAD1:
20517 case NEON_LOADSTRUCT:
20518 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20519 NEON_ARG_MEMORY, NEON_ARG_STOP);
20521 case NEON_LOAD1LANE:
20522 case NEON_LOADSTRUCTLANE:
20523 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20524 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20525 NEON_ARG_STOP);
20527 case NEON_STORE1:
20528 case NEON_STORESTRUCT:
20529 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20530 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20532 case NEON_STORE1LANE:
20533 case NEON_STORESTRUCTLANE:
20534 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20535 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20536 NEON_ARG_STOP);
20539 gcc_unreachable ();
20542 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20543 void
20544 neon_reinterpret (rtx dest, rtx src)
20546 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20549 /* Emit code to place a Neon pair result in memory locations (with equal
20550 registers). */
20551 void
20552 neon_emit_pair_result_insn (enum machine_mode mode,
20553 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20554 rtx op1, rtx op2)
20556 rtx mem = gen_rtx_MEM (mode, destaddr);
20557 rtx tmp1 = gen_reg_rtx (mode);
20558 rtx tmp2 = gen_reg_rtx (mode);
20560 emit_insn (intfn (tmp1, op1, op2, tmp2));
20562 emit_move_insn (mem, tmp1);
20563 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20564 emit_move_insn (mem, tmp2);
20567 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20568 not to early-clobber SRC registers in the process.
20570 We assume that the operands described by SRC and DEST represent a
20571 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20572 number of components into which the copy has been decomposed. */
20573 void
20574 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20576 unsigned int i;
20578 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20579 || REGNO (operands[0]) < REGNO (operands[1]))
20581 for (i = 0; i < count; i++)
20583 operands[2 * i] = dest[i];
20584 operands[2 * i + 1] = src[i];
20587 else
20589 for (i = 0; i < count; i++)
20591 operands[2 * i] = dest[count - i - 1];
20592 operands[2 * i + 1] = src[count - i - 1];
20597 /* Split operands into moves from op[1] + op[2] into op[0]. */
20599 void
20600 neon_split_vcombine (rtx operands[3])
20602 unsigned int dest = REGNO (operands[0]);
20603 unsigned int src1 = REGNO (operands[1]);
20604 unsigned int src2 = REGNO (operands[2]);
20605 enum machine_mode halfmode = GET_MODE (operands[1]);
20606 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20607 rtx destlo, desthi;
20609 if (src1 == dest && src2 == dest + halfregs)
20611 /* No-op move. Can't split to nothing; emit something. */
20612 emit_note (NOTE_INSN_DELETED);
20613 return;
20616 /* Preserve register attributes for variable tracking. */
20617 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20618 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20619 GET_MODE_SIZE (halfmode));
20621 /* Special case of reversed high/low parts. Use VSWP. */
20622 if (src2 == dest && src1 == dest + halfregs)
20624 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20625 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20626 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20627 return;
20630 if (!reg_overlap_mentioned_p (operands[2], destlo))
20632 /* Try to avoid unnecessary moves if part of the result
20633 is in the right place already. */
20634 if (src1 != dest)
20635 emit_move_insn (destlo, operands[1]);
20636 if (src2 != dest + halfregs)
20637 emit_move_insn (desthi, operands[2]);
20639 else
20641 if (src2 != dest + halfregs)
20642 emit_move_insn (desthi, operands[2]);
20643 if (src1 != dest)
20644 emit_move_insn (destlo, operands[1]);
20648 /* Expand an expression EXP that calls a built-in function,
20649 with result going to TARGET if that's convenient
20650 (and in mode MODE if that's convenient).
20651 SUBTARGET may be used as the target for computing one of EXP's operands.
20652 IGNORE is nonzero if the value is to be ignored. */
20654 static rtx
20655 arm_expand_builtin (tree exp,
20656 rtx target,
20657 rtx subtarget ATTRIBUTE_UNUSED,
20658 enum machine_mode mode ATTRIBUTE_UNUSED,
20659 int ignore ATTRIBUTE_UNUSED)
20661 const struct builtin_description * d;
20662 enum insn_code icode;
20663 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20664 tree arg0;
20665 tree arg1;
20666 tree arg2;
20667 rtx op0;
20668 rtx op1;
20669 rtx op2;
20670 rtx pat;
20671 int fcode = DECL_FUNCTION_CODE (fndecl);
20672 size_t i;
20673 enum machine_mode tmode;
20674 enum machine_mode mode0;
20675 enum machine_mode mode1;
20676 enum machine_mode mode2;
20677 int opint;
20678 int selector;
20679 int mask;
20680 int imm;
20682 if (fcode >= ARM_BUILTIN_NEON_BASE)
20683 return arm_expand_neon_builtin (fcode, exp, target);
20685 switch (fcode)
20687 case ARM_BUILTIN_TEXTRMSB:
20688 case ARM_BUILTIN_TEXTRMUB:
20689 case ARM_BUILTIN_TEXTRMSH:
20690 case ARM_BUILTIN_TEXTRMUH:
20691 case ARM_BUILTIN_TEXTRMSW:
20692 case ARM_BUILTIN_TEXTRMUW:
20693 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20694 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20695 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20696 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20697 : CODE_FOR_iwmmxt_textrmw);
20699 arg0 = CALL_EXPR_ARG (exp, 0);
20700 arg1 = CALL_EXPR_ARG (exp, 1);
20701 op0 = expand_normal (arg0);
20702 op1 = expand_normal (arg1);
20703 tmode = insn_data[icode].operand[0].mode;
20704 mode0 = insn_data[icode].operand[1].mode;
20705 mode1 = insn_data[icode].operand[2].mode;
20707 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20708 op0 = copy_to_mode_reg (mode0, op0);
20709 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20711 /* @@@ better error message */
20712 error ("selector must be an immediate");
20713 return gen_reg_rtx (tmode);
20716 opint = INTVAL (op1);
20717 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20719 if (opint > 7 || opint < 0)
20720 error ("the range of selector should be in 0 to 7");
20722 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20724 if (opint > 3 || opint < 0)
20725 error ("the range of selector should be in 0 to 3");
20727 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20729 if (opint > 1 || opint < 0)
20730 error ("the range of selector should be in 0 to 1");
20733 if (target == 0
20734 || GET_MODE (target) != tmode
20735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20736 target = gen_reg_rtx (tmode);
20737 pat = GEN_FCN (icode) (target, op0, op1);
20738 if (! pat)
20739 return 0;
20740 emit_insn (pat);
20741 return target;
20743 case ARM_BUILTIN_WALIGNI:
20744 /* If op2 is immediate, call walighi, else call walighr. */
20745 arg0 = CALL_EXPR_ARG (exp, 0);
20746 arg1 = CALL_EXPR_ARG (exp, 1);
20747 arg2 = CALL_EXPR_ARG (exp, 2);
20748 op0 = expand_normal (arg0);
20749 op1 = expand_normal (arg1);
20750 op2 = expand_normal (arg2);
20751 if (GET_CODE (op2) == CONST_INT)
20753 icode = CODE_FOR_iwmmxt_waligni;
20754 tmode = insn_data[icode].operand[0].mode;
20755 mode0 = insn_data[icode].operand[1].mode;
20756 mode1 = insn_data[icode].operand[2].mode;
20757 mode2 = insn_data[icode].operand[3].mode;
20758 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20759 op0 = copy_to_mode_reg (mode0, op0);
20760 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20761 op1 = copy_to_mode_reg (mode1, op1);
20762 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20763 selector = INTVAL (op2);
20764 if (selector > 7 || selector < 0)
20765 error ("the range of selector should be in 0 to 7");
20767 else
20769 icode = CODE_FOR_iwmmxt_walignr;
20770 tmode = insn_data[icode].operand[0].mode;
20771 mode0 = insn_data[icode].operand[1].mode;
20772 mode1 = insn_data[icode].operand[2].mode;
20773 mode2 = insn_data[icode].operand[3].mode;
20774 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20775 op0 = copy_to_mode_reg (mode0, op0);
20776 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20777 op1 = copy_to_mode_reg (mode1, op1);
20778 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20779 op2 = copy_to_mode_reg (mode2, op2);
20781 if (target == 0
20782 || GET_MODE (target) != tmode
20783 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20784 target = gen_reg_rtx (tmode);
20785 pat = GEN_FCN (icode) (target, op0, op1, op2);
20786 if (!pat)
20787 return 0;
20788 emit_insn (pat);
20789 return target;
20791 case ARM_BUILTIN_TINSRB:
20792 case ARM_BUILTIN_TINSRH:
20793 case ARM_BUILTIN_TINSRW:
20794 case ARM_BUILTIN_WMERGE:
20795 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20796 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20797 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20798 : CODE_FOR_iwmmxt_tinsrw);
20799 arg0 = CALL_EXPR_ARG (exp, 0);
20800 arg1 = CALL_EXPR_ARG (exp, 1);
20801 arg2 = CALL_EXPR_ARG (exp, 2);
20802 op0 = expand_normal (arg0);
20803 op1 = expand_normal (arg1);
20804 op2 = expand_normal (arg2);
20805 tmode = insn_data[icode].operand[0].mode;
20806 mode0 = insn_data[icode].operand[1].mode;
20807 mode1 = insn_data[icode].operand[2].mode;
20808 mode2 = insn_data[icode].operand[3].mode;
20810 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20811 op0 = copy_to_mode_reg (mode0, op0);
20812 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20813 op1 = copy_to_mode_reg (mode1, op1);
20814 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20816 error ("selector must be an immediate");
20817 return const0_rtx;
20819 if (icode == CODE_FOR_iwmmxt_wmerge)
20821 selector = INTVAL (op2);
20822 if (selector > 7 || selector < 0)
20823 error ("the range of selector should be in 0 to 7");
20825 if ((icode == CODE_FOR_iwmmxt_tinsrb)
20826 || (icode == CODE_FOR_iwmmxt_tinsrh)
20827 || (icode == CODE_FOR_iwmmxt_tinsrw))
20829 mask = 0x01;
20830 selector= INTVAL (op2);
20831 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
20832 error ("the range of selector should be in 0 to 7");
20833 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
20834 error ("the range of selector should be in 0 to 3");
20835 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
20836 error ("the range of selector should be in 0 to 1");
20837 mask <<= selector;
20838 op2 = gen_rtx_CONST_INT (SImode, mask);
20840 if (target == 0
20841 || GET_MODE (target) != tmode
20842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20843 target = gen_reg_rtx (tmode);
20844 pat = GEN_FCN (icode) (target, op0, op1, op2);
20845 if (! pat)
20846 return 0;
20847 emit_insn (pat);
20848 return target;
20850 case ARM_BUILTIN_SETWCGR0:
20851 case ARM_BUILTIN_SETWCGR1:
20852 case ARM_BUILTIN_SETWCGR2:
20853 case ARM_BUILTIN_SETWCGR3:
20854 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
20855 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
20856 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
20857 : CODE_FOR_iwmmxt_setwcgr3);
20858 arg0 = CALL_EXPR_ARG (exp, 0);
20859 op0 = expand_normal (arg0);
20860 mode0 = insn_data[icode].operand[0].mode;
20861 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
20862 op0 = copy_to_mode_reg (mode0, op0);
20863 pat = GEN_FCN (icode) (op0);
20864 if (!pat)
20865 return 0;
20866 emit_insn (pat);
20867 return 0;
20869 case ARM_BUILTIN_GETWCGR0:
20870 case ARM_BUILTIN_GETWCGR1:
20871 case ARM_BUILTIN_GETWCGR2:
20872 case ARM_BUILTIN_GETWCGR3:
20873 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
20874 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
20875 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
20876 : CODE_FOR_iwmmxt_getwcgr3);
20877 tmode = insn_data[icode].operand[0].mode;
20878 if (target == 0
20879 || GET_MODE (target) != tmode
20880 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20881 target = gen_reg_rtx (tmode);
20882 pat = GEN_FCN (icode) (target);
20883 if (!pat)
20884 return 0;
20885 emit_insn (pat);
20886 return target;
20888 case ARM_BUILTIN_WSHUFH:
20889 icode = CODE_FOR_iwmmxt_wshufh;
20890 arg0 = CALL_EXPR_ARG (exp, 0);
20891 arg1 = CALL_EXPR_ARG (exp, 1);
20892 op0 = expand_normal (arg0);
20893 op1 = expand_normal (arg1);
20894 tmode = insn_data[icode].operand[0].mode;
20895 mode1 = insn_data[icode].operand[1].mode;
20896 mode2 = insn_data[icode].operand[2].mode;
20898 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20899 op0 = copy_to_mode_reg (mode1, op0);
20900 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20902 error ("mask must be an immediate");
20903 return const0_rtx;
20905 selector = INTVAL (op1);
20906 if (selector < 0 || selector > 255)
20907 error ("the range of mask should be in 0 to 255");
20908 if (target == 0
20909 || GET_MODE (target) != tmode
20910 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20911 target = gen_reg_rtx (tmode);
20912 pat = GEN_FCN (icode) (target, op0, op1);
20913 if (! pat)
20914 return 0;
20915 emit_insn (pat);
20916 return target;
20918 case ARM_BUILTIN_WMADDS:
20919 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
20920 case ARM_BUILTIN_WMADDSX:
20921 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
20922 case ARM_BUILTIN_WMADDSN:
20923 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
20924 case ARM_BUILTIN_WMADDU:
20925 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
20926 case ARM_BUILTIN_WMADDUX:
20927 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
20928 case ARM_BUILTIN_WMADDUN:
20929 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
20930 case ARM_BUILTIN_WSADBZ:
20931 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20932 case ARM_BUILTIN_WSADHZ:
20933 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20935 /* Several three-argument builtins. */
20936 case ARM_BUILTIN_WMACS:
20937 case ARM_BUILTIN_WMACU:
20938 case ARM_BUILTIN_TMIA:
20939 case ARM_BUILTIN_TMIAPH:
20940 case ARM_BUILTIN_TMIATT:
20941 case ARM_BUILTIN_TMIATB:
20942 case ARM_BUILTIN_TMIABT:
20943 case ARM_BUILTIN_TMIABB:
20944 case ARM_BUILTIN_WQMIABB:
20945 case ARM_BUILTIN_WQMIABT:
20946 case ARM_BUILTIN_WQMIATB:
20947 case ARM_BUILTIN_WQMIATT:
20948 case ARM_BUILTIN_WQMIABBN:
20949 case ARM_BUILTIN_WQMIABTN:
20950 case ARM_BUILTIN_WQMIATBN:
20951 case ARM_BUILTIN_WQMIATTN:
20952 case ARM_BUILTIN_WMIABB:
20953 case ARM_BUILTIN_WMIABT:
20954 case ARM_BUILTIN_WMIATB:
20955 case ARM_BUILTIN_WMIATT:
20956 case ARM_BUILTIN_WMIABBN:
20957 case ARM_BUILTIN_WMIABTN:
20958 case ARM_BUILTIN_WMIATBN:
20959 case ARM_BUILTIN_WMIATTN:
20960 case ARM_BUILTIN_WMIAWBB:
20961 case ARM_BUILTIN_WMIAWBT:
20962 case ARM_BUILTIN_WMIAWTB:
20963 case ARM_BUILTIN_WMIAWTT:
20964 case ARM_BUILTIN_WMIAWBBN:
20965 case ARM_BUILTIN_WMIAWBTN:
20966 case ARM_BUILTIN_WMIAWTBN:
20967 case ARM_BUILTIN_WMIAWTTN:
20968 case ARM_BUILTIN_WSADB:
20969 case ARM_BUILTIN_WSADH:
20970 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20971 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20972 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20973 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20974 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20975 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20976 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20977 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20978 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
20979 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
20980 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
20981 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
20982 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
20983 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
20984 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
20985 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
20986 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
20987 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
20988 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
20989 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
20990 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
20991 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
20992 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
20993 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
20994 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
20995 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
20996 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
20997 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
20998 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
20999 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21000 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21001 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21002 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21003 : CODE_FOR_iwmmxt_wsadh);
21004 arg0 = CALL_EXPR_ARG (exp, 0);
21005 arg1 = CALL_EXPR_ARG (exp, 1);
21006 arg2 = CALL_EXPR_ARG (exp, 2);
21007 op0 = expand_normal (arg0);
21008 op1 = expand_normal (arg1);
21009 op2 = expand_normal (arg2);
21010 tmode = insn_data[icode].operand[0].mode;
21011 mode0 = insn_data[icode].operand[1].mode;
21012 mode1 = insn_data[icode].operand[2].mode;
21013 mode2 = insn_data[icode].operand[3].mode;
21015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21016 op0 = copy_to_mode_reg (mode0, op0);
21017 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21018 op1 = copy_to_mode_reg (mode1, op1);
21019 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21020 op2 = copy_to_mode_reg (mode2, op2);
21021 if (target == 0
21022 || GET_MODE (target) != tmode
21023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21024 target = gen_reg_rtx (tmode);
21025 pat = GEN_FCN (icode) (target, op0, op1, op2);
21026 if (! pat)
21027 return 0;
21028 emit_insn (pat);
21029 return target;
21031 case ARM_BUILTIN_WZERO:
21032 target = gen_reg_rtx (DImode);
21033 emit_insn (gen_iwmmxt_clrdi (target));
21034 return target;
21036 case ARM_BUILTIN_WSRLHI:
21037 case ARM_BUILTIN_WSRLWI:
21038 case ARM_BUILTIN_WSRLDI:
21039 case ARM_BUILTIN_WSLLHI:
21040 case ARM_BUILTIN_WSLLWI:
21041 case ARM_BUILTIN_WSLLDI:
21042 case ARM_BUILTIN_WSRAHI:
21043 case ARM_BUILTIN_WSRAWI:
21044 case ARM_BUILTIN_WSRADI:
21045 case ARM_BUILTIN_WRORHI:
21046 case ARM_BUILTIN_WRORWI:
21047 case ARM_BUILTIN_WRORDI:
21048 case ARM_BUILTIN_WSRLH:
21049 case ARM_BUILTIN_WSRLW:
21050 case ARM_BUILTIN_WSRLD:
21051 case ARM_BUILTIN_WSLLH:
21052 case ARM_BUILTIN_WSLLW:
21053 case ARM_BUILTIN_WSLLD:
21054 case ARM_BUILTIN_WSRAH:
21055 case ARM_BUILTIN_WSRAW:
21056 case ARM_BUILTIN_WSRAD:
21057 case ARM_BUILTIN_WRORH:
21058 case ARM_BUILTIN_WRORW:
21059 case ARM_BUILTIN_WRORD:
21060 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21061 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21062 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21063 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21064 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21065 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21066 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21067 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21068 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21069 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21070 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21071 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21072 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21073 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21074 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21075 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21076 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21077 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21078 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21079 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21080 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21081 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21082 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21083 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21084 : CODE_FOR_nothing);
21085 arg1 = CALL_EXPR_ARG (exp, 1);
21086 op1 = expand_normal (arg1);
21087 if (GET_MODE (op1) == VOIDmode)
21089 imm = INTVAL (op1);
21090 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21091 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21092 && (imm < 0 || imm > 32))
21094 if (fcode == ARM_BUILTIN_WRORHI)
21095 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21096 else if (fcode == ARM_BUILTIN_WRORWI)
21097 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21098 else if (fcode == ARM_BUILTIN_WRORH)
21099 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21100 else
21101 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21103 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21104 && (imm < 0 || imm > 64))
21106 if (fcode == ARM_BUILTIN_WRORDI)
21107 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21108 else
21109 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21111 else if (imm < 0)
21113 if (fcode == ARM_BUILTIN_WSRLHI)
21114 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21115 else if (fcode == ARM_BUILTIN_WSRLWI)
21116 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21117 else if (fcode == ARM_BUILTIN_WSRLDI)
21118 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21119 else if (fcode == ARM_BUILTIN_WSLLHI)
21120 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21121 else if (fcode == ARM_BUILTIN_WSLLWI)
21122 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21123 else if (fcode == ARM_BUILTIN_WSLLDI)
21124 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21125 else if (fcode == ARM_BUILTIN_WSRAHI)
21126 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21127 else if (fcode == ARM_BUILTIN_WSRAWI)
21128 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21129 else if (fcode == ARM_BUILTIN_WSRADI)
21130 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21131 else if (fcode == ARM_BUILTIN_WSRLH)
21132 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21133 else if (fcode == ARM_BUILTIN_WSRLW)
21134 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21135 else if (fcode == ARM_BUILTIN_WSRLD)
21136 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21137 else if (fcode == ARM_BUILTIN_WSLLH)
21138 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21139 else if (fcode == ARM_BUILTIN_WSLLW)
21140 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21141 else if (fcode == ARM_BUILTIN_WSLLD)
21142 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21143 else if (fcode == ARM_BUILTIN_WSRAH)
21144 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21145 else if (fcode == ARM_BUILTIN_WSRAW)
21146 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21147 else
21148 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21151 return arm_expand_binop_builtin (icode, exp, target);
21153 case ARM_BUILTIN_THREAD_POINTER:
21154 return arm_load_tp (target);
21156 default:
21157 break;
21160 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21161 if (d->code == (const enum arm_builtins) fcode)
21162 return arm_expand_binop_builtin (d->icode, exp, target);
21164 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21165 if (d->code == (const enum arm_builtins) fcode)
21166 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21168 /* @@@ Should really do something sensible here. */
21169 return NULL_RTX;
21172 /* Return the number (counting from 0) of
21173 the least significant set bit in MASK. */
21175 inline static int
21176 number_of_first_bit_set (unsigned mask)
21178 return ctz_hwi (mask);
21181 /* Like emit_multi_reg_push, but allowing for a different set of
21182 registers to be described as saved. MASK is the set of registers
21183 to be saved; REAL_REGS is the set of registers to be described as
21184 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21186 static rtx
21187 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21189 unsigned long regno;
21190 rtx par[10], tmp, reg, insn;
21191 int i, j;
21193 /* Build the parallel of the registers actually being stored. */
21194 for (i = 0; mask; ++i, mask &= mask - 1)
21196 regno = ctz_hwi (mask);
21197 reg = gen_rtx_REG (SImode, regno);
21199 if (i == 0)
21200 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21201 else
21202 tmp = gen_rtx_USE (VOIDmode, reg);
21204 par[i] = tmp;
21207 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21208 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21209 tmp = gen_frame_mem (BLKmode, tmp);
21210 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21211 par[0] = tmp;
21213 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21214 insn = emit_insn (tmp);
21216 /* Always build the stack adjustment note for unwind info. */
21217 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21218 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21219 par[0] = tmp;
21221 /* Build the parallel of the registers recorded as saved for unwind. */
21222 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21224 regno = ctz_hwi (real_regs);
21225 reg = gen_rtx_REG (SImode, regno);
21227 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21228 tmp = gen_frame_mem (SImode, tmp);
21229 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21230 RTX_FRAME_RELATED_P (tmp) = 1;
21231 par[j + 1] = tmp;
21234 if (j == 0)
21235 tmp = par[0];
21236 else
21238 RTX_FRAME_RELATED_P (par[0]) = 1;
21239 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21242 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21244 return insn;
21247 /* Emit code to push or pop registers to or from the stack. F is the
21248 assembly file. MASK is the registers to pop. */
21249 static void
21250 thumb_pop (FILE *f, unsigned long mask)
21252 int regno;
21253 int lo_mask = mask & 0xFF;
21254 int pushed_words = 0;
21256 gcc_assert (mask);
21258 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21260 /* Special case. Do not generate a POP PC statement here, do it in
21261 thumb_exit() */
21262 thumb_exit (f, -1);
21263 return;
21266 fprintf (f, "\tpop\t{");
21268 /* Look at the low registers first. */
21269 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21271 if (lo_mask & 1)
21273 asm_fprintf (f, "%r", regno);
21275 if ((lo_mask & ~1) != 0)
21276 fprintf (f, ", ");
21278 pushed_words++;
21282 if (mask & (1 << PC_REGNUM))
21284 /* Catch popping the PC. */
21285 if (TARGET_INTERWORK || TARGET_BACKTRACE
21286 || crtl->calls_eh_return)
21288 /* The PC is never poped directly, instead
21289 it is popped into r3 and then BX is used. */
21290 fprintf (f, "}\n");
21292 thumb_exit (f, -1);
21294 return;
21296 else
21298 if (mask & 0xFF)
21299 fprintf (f, ", ");
21301 asm_fprintf (f, "%r", PC_REGNUM);
21305 fprintf (f, "}\n");
21308 /* Generate code to return from a thumb function.
21309 If 'reg_containing_return_addr' is -1, then the return address is
21310 actually on the stack, at the stack pointer. */
21311 static void
21312 thumb_exit (FILE *f, int reg_containing_return_addr)
21314 unsigned regs_available_for_popping;
21315 unsigned regs_to_pop;
21316 int pops_needed;
21317 unsigned available;
21318 unsigned required;
21319 int mode;
21320 int size;
21321 int restore_a4 = FALSE;
21323 /* Compute the registers we need to pop. */
21324 regs_to_pop = 0;
21325 pops_needed = 0;
21327 if (reg_containing_return_addr == -1)
21329 regs_to_pop |= 1 << LR_REGNUM;
21330 ++pops_needed;
21333 if (TARGET_BACKTRACE)
21335 /* Restore the (ARM) frame pointer and stack pointer. */
21336 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21337 pops_needed += 2;
21340 /* If there is nothing to pop then just emit the BX instruction and
21341 return. */
21342 if (pops_needed == 0)
21344 if (crtl->calls_eh_return)
21345 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21347 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21348 return;
21350 /* Otherwise if we are not supporting interworking and we have not created
21351 a backtrace structure and the function was not entered in ARM mode then
21352 just pop the return address straight into the PC. */
21353 else if (!TARGET_INTERWORK
21354 && !TARGET_BACKTRACE
21355 && !is_called_in_ARM_mode (current_function_decl)
21356 && !crtl->calls_eh_return)
21358 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21359 return;
21362 /* Find out how many of the (return) argument registers we can corrupt. */
21363 regs_available_for_popping = 0;
21365 /* If returning via __builtin_eh_return, the bottom three registers
21366 all contain information needed for the return. */
21367 if (crtl->calls_eh_return)
21368 size = 12;
21369 else
21371 /* If we can deduce the registers used from the function's
21372 return value. This is more reliable that examining
21373 df_regs_ever_live_p () because that will be set if the register is
21374 ever used in the function, not just if the register is used
21375 to hold a return value. */
21377 if (crtl->return_rtx != 0)
21378 mode = GET_MODE (crtl->return_rtx);
21379 else
21380 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21382 size = GET_MODE_SIZE (mode);
21384 if (size == 0)
21386 /* In a void function we can use any argument register.
21387 In a function that returns a structure on the stack
21388 we can use the second and third argument registers. */
21389 if (mode == VOIDmode)
21390 regs_available_for_popping =
21391 (1 << ARG_REGISTER (1))
21392 | (1 << ARG_REGISTER (2))
21393 | (1 << ARG_REGISTER (3));
21394 else
21395 regs_available_for_popping =
21396 (1 << ARG_REGISTER (2))
21397 | (1 << ARG_REGISTER (3));
21399 else if (size <= 4)
21400 regs_available_for_popping =
21401 (1 << ARG_REGISTER (2))
21402 | (1 << ARG_REGISTER (3));
21403 else if (size <= 8)
21404 regs_available_for_popping =
21405 (1 << ARG_REGISTER (3));
21408 /* Match registers to be popped with registers into which we pop them. */
21409 for (available = regs_available_for_popping,
21410 required = regs_to_pop;
21411 required != 0 && available != 0;
21412 available &= ~(available & - available),
21413 required &= ~(required & - required))
21414 -- pops_needed;
21416 /* If we have any popping registers left over, remove them. */
21417 if (available > 0)
21418 regs_available_for_popping &= ~available;
21420 /* Otherwise if we need another popping register we can use
21421 the fourth argument register. */
21422 else if (pops_needed)
21424 /* If we have not found any free argument registers and
21425 reg a4 contains the return address, we must move it. */
21426 if (regs_available_for_popping == 0
21427 && reg_containing_return_addr == LAST_ARG_REGNUM)
21429 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21430 reg_containing_return_addr = LR_REGNUM;
21432 else if (size > 12)
21434 /* Register a4 is being used to hold part of the return value,
21435 but we have dire need of a free, low register. */
21436 restore_a4 = TRUE;
21438 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21441 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21443 /* The fourth argument register is available. */
21444 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21446 --pops_needed;
21450 /* Pop as many registers as we can. */
21451 thumb_pop (f, regs_available_for_popping);
21453 /* Process the registers we popped. */
21454 if (reg_containing_return_addr == -1)
21456 /* The return address was popped into the lowest numbered register. */
21457 regs_to_pop &= ~(1 << LR_REGNUM);
21459 reg_containing_return_addr =
21460 number_of_first_bit_set (regs_available_for_popping);
21462 /* Remove this register for the mask of available registers, so that
21463 the return address will not be corrupted by further pops. */
21464 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21467 /* If we popped other registers then handle them here. */
21468 if (regs_available_for_popping)
21470 int frame_pointer;
21472 /* Work out which register currently contains the frame pointer. */
21473 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21475 /* Move it into the correct place. */
21476 asm_fprintf (f, "\tmov\t%r, %r\n",
21477 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21479 /* (Temporarily) remove it from the mask of popped registers. */
21480 regs_available_for_popping &= ~(1 << frame_pointer);
21481 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21483 if (regs_available_for_popping)
21485 int stack_pointer;
21487 /* We popped the stack pointer as well,
21488 find the register that contains it. */
21489 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21491 /* Move it into the stack register. */
21492 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21494 /* At this point we have popped all necessary registers, so
21495 do not worry about restoring regs_available_for_popping
21496 to its correct value:
21498 assert (pops_needed == 0)
21499 assert (regs_available_for_popping == (1 << frame_pointer))
21500 assert (regs_to_pop == (1 << STACK_POINTER)) */
21502 else
21504 /* Since we have just move the popped value into the frame
21505 pointer, the popping register is available for reuse, and
21506 we know that we still have the stack pointer left to pop. */
21507 regs_available_for_popping |= (1 << frame_pointer);
21511 /* If we still have registers left on the stack, but we no longer have
21512 any registers into which we can pop them, then we must move the return
21513 address into the link register and make available the register that
21514 contained it. */
21515 if (regs_available_for_popping == 0 && pops_needed > 0)
21517 regs_available_for_popping |= 1 << reg_containing_return_addr;
21519 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21520 reg_containing_return_addr);
21522 reg_containing_return_addr = LR_REGNUM;
21525 /* If we have registers left on the stack then pop some more.
21526 We know that at most we will want to pop FP and SP. */
21527 if (pops_needed > 0)
21529 int popped_into;
21530 int move_to;
21532 thumb_pop (f, regs_available_for_popping);
21534 /* We have popped either FP or SP.
21535 Move whichever one it is into the correct register. */
21536 popped_into = number_of_first_bit_set (regs_available_for_popping);
21537 move_to = number_of_first_bit_set (regs_to_pop);
21539 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21541 regs_to_pop &= ~(1 << move_to);
21543 --pops_needed;
21546 /* If we still have not popped everything then we must have only
21547 had one register available to us and we are now popping the SP. */
21548 if (pops_needed > 0)
21550 int popped_into;
21552 thumb_pop (f, regs_available_for_popping);
21554 popped_into = number_of_first_bit_set (regs_available_for_popping);
21556 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21558 assert (regs_to_pop == (1 << STACK_POINTER))
21559 assert (pops_needed == 1)
21563 /* If necessary restore the a4 register. */
21564 if (restore_a4)
21566 if (reg_containing_return_addr != LR_REGNUM)
21568 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21569 reg_containing_return_addr = LR_REGNUM;
21572 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21575 if (crtl->calls_eh_return)
21576 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21578 /* Return to caller. */
21579 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21582 /* Scan INSN just before assembler is output for it.
21583 For Thumb-1, we track the status of the condition codes; this
21584 information is used in the cbranchsi4_insn pattern. */
21585 void
21586 thumb1_final_prescan_insn (rtx insn)
21588 if (flag_print_asm_name)
21589 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21590 INSN_ADDRESSES (INSN_UID (insn)));
21591 /* Don't overwrite the previous setter when we get to a cbranch. */
21592 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21594 enum attr_conds conds;
21596 if (cfun->machine->thumb1_cc_insn)
21598 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21599 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21600 CC_STATUS_INIT;
21602 conds = get_attr_conds (insn);
21603 if (conds == CONDS_SET)
21605 rtx set = single_set (insn);
21606 cfun->machine->thumb1_cc_insn = insn;
21607 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21608 cfun->machine->thumb1_cc_op1 = const0_rtx;
21609 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21610 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21612 rtx src1 = XEXP (SET_SRC (set), 1);
21613 if (src1 == const0_rtx)
21614 cfun->machine->thumb1_cc_mode = CCmode;
21617 else if (conds != CONDS_NOCOND)
21618 cfun->machine->thumb1_cc_insn = NULL_RTX;
21623 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21625 unsigned HOST_WIDE_INT mask = 0xff;
21626 int i;
21628 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21629 if (val == 0) /* XXX */
21630 return 0;
21632 for (i = 0; i < 25; i++)
21633 if ((val & (mask << i)) == val)
21634 return 1;
21636 return 0;
21639 /* Returns nonzero if the current function contains,
21640 or might contain a far jump. */
21641 static int
21642 thumb_far_jump_used_p (void)
21644 rtx insn;
21646 /* This test is only important for leaf functions. */
21647 /* assert (!leaf_function_p ()); */
21649 /* If we have already decided that far jumps may be used,
21650 do not bother checking again, and always return true even if
21651 it turns out that they are not being used. Once we have made
21652 the decision that far jumps are present (and that hence the link
21653 register will be pushed onto the stack) we cannot go back on it. */
21654 if (cfun->machine->far_jump_used)
21655 return 1;
21657 /* If this function is not being called from the prologue/epilogue
21658 generation code then it must be being called from the
21659 INITIAL_ELIMINATION_OFFSET macro. */
21660 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21662 /* In this case we know that we are being asked about the elimination
21663 of the arg pointer register. If that register is not being used,
21664 then there are no arguments on the stack, and we do not have to
21665 worry that a far jump might force the prologue to push the link
21666 register, changing the stack offsets. In this case we can just
21667 return false, since the presence of far jumps in the function will
21668 not affect stack offsets.
21670 If the arg pointer is live (or if it was live, but has now been
21671 eliminated and so set to dead) then we do have to test to see if
21672 the function might contain a far jump. This test can lead to some
21673 false negatives, since before reload is completed, then length of
21674 branch instructions is not known, so gcc defaults to returning their
21675 longest length, which in turn sets the far jump attribute to true.
21677 A false negative will not result in bad code being generated, but it
21678 will result in a needless push and pop of the link register. We
21679 hope that this does not occur too often.
21681 If we need doubleword stack alignment this could affect the other
21682 elimination offsets so we can't risk getting it wrong. */
21683 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21684 cfun->machine->arg_pointer_live = 1;
21685 else if (!cfun->machine->arg_pointer_live)
21686 return 0;
21689 /* Check to see if the function contains a branch
21690 insn with the far jump attribute set. */
21691 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21693 if (GET_CODE (insn) == JUMP_INSN
21694 /* Ignore tablejump patterns. */
21695 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21696 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21697 && get_attr_far_jump (insn) == FAR_JUMP_YES
21700 /* Record the fact that we have decided that
21701 the function does use far jumps. */
21702 cfun->machine->far_jump_used = 1;
21703 return 1;
21707 return 0;
21710 /* Return nonzero if FUNC must be entered in ARM mode. */
21712 is_called_in_ARM_mode (tree func)
21714 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21716 /* Ignore the problem about functions whose address is taken. */
21717 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21718 return TRUE;
21720 #ifdef ARM_PE
21721 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21722 #else
21723 return FALSE;
21724 #endif
21727 /* Given the stack offsets and register mask in OFFSETS, decide how
21728 many additional registers to push instead of subtracting a constant
21729 from SP. For epilogues the principle is the same except we use pop.
21730 FOR_PROLOGUE indicates which we're generating. */
21731 static int
21732 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21734 HOST_WIDE_INT amount;
21735 unsigned long live_regs_mask = offsets->saved_regs_mask;
21736 /* Extract a mask of the ones we can give to the Thumb's push/pop
21737 instruction. */
21738 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21739 /* Then count how many other high registers will need to be pushed. */
21740 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21741 int n_free, reg_base;
21743 if (!for_prologue && frame_pointer_needed)
21744 amount = offsets->locals_base - offsets->saved_regs;
21745 else
21746 amount = offsets->outgoing_args - offsets->saved_regs;
21748 /* If the stack frame size is 512 exactly, we can save one load
21749 instruction, which should make this a win even when optimizing
21750 for speed. */
21751 if (!optimize_size && amount != 512)
21752 return 0;
21754 /* Can't do this if there are high registers to push. */
21755 if (high_regs_pushed != 0)
21756 return 0;
21758 /* Shouldn't do it in the prologue if no registers would normally
21759 be pushed at all. In the epilogue, also allow it if we'll have
21760 a pop insn for the PC. */
21761 if (l_mask == 0
21762 && (for_prologue
21763 || TARGET_BACKTRACE
21764 || (live_regs_mask & 1 << LR_REGNUM) == 0
21765 || TARGET_INTERWORK
21766 || crtl->args.pretend_args_size != 0))
21767 return 0;
21769 /* Don't do this if thumb_expand_prologue wants to emit instructions
21770 between the push and the stack frame allocation. */
21771 if (for_prologue
21772 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21773 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21774 return 0;
21776 reg_base = 0;
21777 n_free = 0;
21778 if (!for_prologue)
21780 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21781 live_regs_mask >>= reg_base;
21784 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21785 && (for_prologue || call_used_regs[reg_base + n_free]))
21787 live_regs_mask >>= 1;
21788 n_free++;
21791 if (n_free == 0)
21792 return 0;
21793 gcc_assert (amount / 4 * 4 == amount);
21795 if (amount >= 512 && (amount - n_free * 4) < 512)
21796 return (amount - 508) / 4;
21797 if (amount <= n_free * 4)
21798 return amount / 4;
21799 return 0;
21802 /* The bits which aren't usefully expanded as rtl. */
21803 const char *
21804 thumb1_unexpanded_epilogue (void)
21806 arm_stack_offsets *offsets;
21807 int regno;
21808 unsigned long live_regs_mask = 0;
21809 int high_regs_pushed = 0;
21810 int extra_pop;
21811 int had_to_push_lr;
21812 int size;
21814 if (cfun->machine->return_used_this_function != 0)
21815 return "";
21817 if (IS_NAKED (arm_current_func_type ()))
21818 return "";
21820 offsets = arm_get_frame_offsets ();
21821 live_regs_mask = offsets->saved_regs_mask;
21822 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21824 /* If we can deduce the registers used from the function's return value.
21825 This is more reliable that examining df_regs_ever_live_p () because that
21826 will be set if the register is ever used in the function, not just if
21827 the register is used to hold a return value. */
21828 size = arm_size_return_regs ();
21830 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21831 if (extra_pop > 0)
21833 unsigned long extra_mask = (1 << extra_pop) - 1;
21834 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21835 / UNITS_PER_WORD);
21838 /* The prolog may have pushed some high registers to use as
21839 work registers. e.g. the testsuite file:
21840 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21841 compiles to produce:
21842 push {r4, r5, r6, r7, lr}
21843 mov r7, r9
21844 mov r6, r8
21845 push {r6, r7}
21846 as part of the prolog. We have to undo that pushing here. */
21848 if (high_regs_pushed)
21850 unsigned long mask = live_regs_mask & 0xff;
21851 int next_hi_reg;
21853 /* The available low registers depend on the size of the value we are
21854 returning. */
21855 if (size <= 12)
21856 mask |= 1 << 3;
21857 if (size <= 8)
21858 mask |= 1 << 2;
21860 if (mask == 0)
21861 /* Oh dear! We have no low registers into which we can pop
21862 high registers! */
21863 internal_error
21864 ("no low registers available for popping high registers");
21866 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21867 if (live_regs_mask & (1 << next_hi_reg))
21868 break;
21870 while (high_regs_pushed)
21872 /* Find lo register(s) into which the high register(s) can
21873 be popped. */
21874 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21876 if (mask & (1 << regno))
21877 high_regs_pushed--;
21878 if (high_regs_pushed == 0)
21879 break;
21882 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21884 /* Pop the values into the low register(s). */
21885 thumb_pop (asm_out_file, mask);
21887 /* Move the value(s) into the high registers. */
21888 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21890 if (mask & (1 << regno))
21892 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21893 regno);
21895 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21896 if (live_regs_mask & (1 << next_hi_reg))
21897 break;
21901 live_regs_mask &= ~0x0f00;
21904 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21905 live_regs_mask &= 0xff;
21907 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21909 /* Pop the return address into the PC. */
21910 if (had_to_push_lr)
21911 live_regs_mask |= 1 << PC_REGNUM;
21913 /* Either no argument registers were pushed or a backtrace
21914 structure was created which includes an adjusted stack
21915 pointer, so just pop everything. */
21916 if (live_regs_mask)
21917 thumb_pop (asm_out_file, live_regs_mask);
21919 /* We have either just popped the return address into the
21920 PC or it is was kept in LR for the entire function.
21921 Note that thumb_pop has already called thumb_exit if the
21922 PC was in the list. */
21923 if (!had_to_push_lr)
21924 thumb_exit (asm_out_file, LR_REGNUM);
21926 else
21928 /* Pop everything but the return address. */
21929 if (live_regs_mask)
21930 thumb_pop (asm_out_file, live_regs_mask);
21932 if (had_to_push_lr)
21934 if (size > 12)
21936 /* We have no free low regs, so save one. */
21937 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21938 LAST_ARG_REGNUM);
21941 /* Get the return address into a temporary register. */
21942 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21944 if (size > 12)
21946 /* Move the return address to lr. */
21947 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21948 LAST_ARG_REGNUM);
21949 /* Restore the low register. */
21950 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21951 IP_REGNUM);
21952 regno = LR_REGNUM;
21954 else
21955 regno = LAST_ARG_REGNUM;
21957 else
21958 regno = LR_REGNUM;
21960 /* Remove the argument registers that were pushed onto the stack. */
21961 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21962 SP_REGNUM, SP_REGNUM,
21963 crtl->args.pretend_args_size);
21965 thumb_exit (asm_out_file, regno);
21968 return "";
21971 /* Functions to save and restore machine-specific function data. */
21972 static struct machine_function *
21973 arm_init_machine_status (void)
21975 struct machine_function *machine;
21976 machine = ggc_alloc_cleared_machine_function ();
21978 #if ARM_FT_UNKNOWN != 0
21979 machine->func_type = ARM_FT_UNKNOWN;
21980 #endif
21981 return machine;
21984 /* Return an RTX indicating where the return address to the
21985 calling function can be found. */
21987 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21989 if (count != 0)
21990 return NULL_RTX;
21992 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21995 /* Do anything needed before RTL is emitted for each function. */
21996 void
21997 arm_init_expanders (void)
21999 /* Arrange to initialize and mark the machine per-function status. */
22000 init_machine_status = arm_init_machine_status;
22002 /* This is to stop the combine pass optimizing away the alignment
22003 adjustment of va_arg. */
22004 /* ??? It is claimed that this should not be necessary. */
22005 if (cfun)
22006 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22010 /* Like arm_compute_initial_elimination offset. Simpler because there
22011 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22012 to point at the base of the local variables after static stack
22013 space for a function has been allocated. */
22015 HOST_WIDE_INT
22016 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22018 arm_stack_offsets *offsets;
22020 offsets = arm_get_frame_offsets ();
22022 switch (from)
22024 case ARG_POINTER_REGNUM:
22025 switch (to)
22027 case STACK_POINTER_REGNUM:
22028 return offsets->outgoing_args - offsets->saved_args;
22030 case FRAME_POINTER_REGNUM:
22031 return offsets->soft_frame - offsets->saved_args;
22033 case ARM_HARD_FRAME_POINTER_REGNUM:
22034 return offsets->saved_regs - offsets->saved_args;
22036 case THUMB_HARD_FRAME_POINTER_REGNUM:
22037 return offsets->locals_base - offsets->saved_args;
22039 default:
22040 gcc_unreachable ();
22042 break;
22044 case FRAME_POINTER_REGNUM:
22045 switch (to)
22047 case STACK_POINTER_REGNUM:
22048 return offsets->outgoing_args - offsets->soft_frame;
22050 case ARM_HARD_FRAME_POINTER_REGNUM:
22051 return offsets->saved_regs - offsets->soft_frame;
22053 case THUMB_HARD_FRAME_POINTER_REGNUM:
22054 return offsets->locals_base - offsets->soft_frame;
22056 default:
22057 gcc_unreachable ();
22059 break;
22061 default:
22062 gcc_unreachable ();
22066 /* Generate the function's prologue. */
22068 void
22069 thumb1_expand_prologue (void)
22071 rtx insn;
22073 HOST_WIDE_INT amount;
22074 arm_stack_offsets *offsets;
22075 unsigned long func_type;
22076 int regno;
22077 unsigned long live_regs_mask;
22078 unsigned long l_mask;
22079 unsigned high_regs_pushed = 0;
22081 func_type = arm_current_func_type ();
22083 /* Naked functions don't have prologues. */
22084 if (IS_NAKED (func_type))
22085 return;
22087 if (IS_INTERRUPT (func_type))
22089 error ("interrupt Service Routines cannot be coded in Thumb mode");
22090 return;
22093 if (is_called_in_ARM_mode (current_function_decl))
22094 emit_insn (gen_prologue_thumb1_interwork ());
22096 offsets = arm_get_frame_offsets ();
22097 live_regs_mask = offsets->saved_regs_mask;
22099 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22100 l_mask = live_regs_mask & 0x40ff;
22101 /* Then count how many other high registers will need to be pushed. */
22102 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22104 if (crtl->args.pretend_args_size)
22106 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22108 if (cfun->machine->uses_anonymous_args)
22110 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22111 unsigned long mask;
22113 mask = 1ul << (LAST_ARG_REGNUM + 1);
22114 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22116 insn = thumb1_emit_multi_reg_push (mask, 0);
22118 else
22120 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22121 stack_pointer_rtx, x));
22123 RTX_FRAME_RELATED_P (insn) = 1;
22126 if (TARGET_BACKTRACE)
22128 HOST_WIDE_INT offset = 0;
22129 unsigned work_register;
22130 rtx work_reg, x, arm_hfp_rtx;
22132 /* We have been asked to create a stack backtrace structure.
22133 The code looks like this:
22135 0 .align 2
22136 0 func:
22137 0 sub SP, #16 Reserve space for 4 registers.
22138 2 push {R7} Push low registers.
22139 4 add R7, SP, #20 Get the stack pointer before the push.
22140 6 str R7, [SP, #8] Store the stack pointer
22141 (before reserving the space).
22142 8 mov R7, PC Get hold of the start of this code + 12.
22143 10 str R7, [SP, #16] Store it.
22144 12 mov R7, FP Get hold of the current frame pointer.
22145 14 str R7, [SP, #4] Store it.
22146 16 mov R7, LR Get hold of the current return address.
22147 18 str R7, [SP, #12] Store it.
22148 20 add R7, SP, #16 Point at the start of the
22149 backtrace structure.
22150 22 mov FP, R7 Put this value into the frame pointer. */
22152 work_register = thumb_find_work_register (live_regs_mask);
22153 work_reg = gen_rtx_REG (SImode, work_register);
22154 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22156 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22157 stack_pointer_rtx, GEN_INT (-16)));
22158 RTX_FRAME_RELATED_P (insn) = 1;
22160 if (l_mask)
22162 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22163 RTX_FRAME_RELATED_P (insn) = 1;
22165 offset = bit_count (l_mask) * UNITS_PER_WORD;
22168 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22169 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22171 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22172 x = gen_frame_mem (SImode, x);
22173 emit_move_insn (x, work_reg);
22175 /* Make sure that the instruction fetching the PC is in the right place
22176 to calculate "start of backtrace creation code + 12". */
22177 /* ??? The stores using the common WORK_REG ought to be enough to
22178 prevent the scheduler from doing anything weird. Failing that
22179 we could always move all of the following into an UNSPEC_VOLATILE. */
22180 if (l_mask)
22182 x = gen_rtx_REG (SImode, PC_REGNUM);
22183 emit_move_insn (work_reg, x);
22185 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22186 x = gen_frame_mem (SImode, x);
22187 emit_move_insn (x, work_reg);
22189 emit_move_insn (work_reg, arm_hfp_rtx);
22191 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22192 x = gen_frame_mem (SImode, x);
22193 emit_move_insn (x, work_reg);
22195 else
22197 emit_move_insn (work_reg, arm_hfp_rtx);
22199 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22200 x = gen_frame_mem (SImode, x);
22201 emit_move_insn (x, work_reg);
22203 x = gen_rtx_REG (SImode, PC_REGNUM);
22204 emit_move_insn (work_reg, x);
22206 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22207 x = gen_frame_mem (SImode, x);
22208 emit_move_insn (x, work_reg);
22211 x = gen_rtx_REG (SImode, LR_REGNUM);
22212 emit_move_insn (work_reg, x);
22214 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22215 x = gen_frame_mem (SImode, x);
22216 emit_move_insn (x, work_reg);
22218 x = GEN_INT (offset + 12);
22219 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22221 emit_move_insn (arm_hfp_rtx, work_reg);
22223 /* Optimization: If we are not pushing any low registers but we are going
22224 to push some high registers then delay our first push. This will just
22225 be a push of LR and we can combine it with the push of the first high
22226 register. */
22227 else if ((l_mask & 0xff) != 0
22228 || (high_regs_pushed == 0 && l_mask))
22230 unsigned long mask = l_mask;
22231 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22232 insn = thumb1_emit_multi_reg_push (mask, mask);
22233 RTX_FRAME_RELATED_P (insn) = 1;
22236 if (high_regs_pushed)
22238 unsigned pushable_regs;
22239 unsigned next_hi_reg;
22241 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22242 if (live_regs_mask & (1 << next_hi_reg))
22243 break;
22245 pushable_regs = l_mask & 0xff;
22247 if (pushable_regs == 0)
22248 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22250 while (high_regs_pushed > 0)
22252 unsigned long real_regs_mask = 0;
22254 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22256 if (pushable_regs & (1 << regno))
22258 emit_move_insn (gen_rtx_REG (SImode, regno),
22259 gen_rtx_REG (SImode, next_hi_reg));
22261 high_regs_pushed --;
22262 real_regs_mask |= (1 << next_hi_reg);
22264 if (high_regs_pushed)
22266 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22267 next_hi_reg --)
22268 if (live_regs_mask & (1 << next_hi_reg))
22269 break;
22271 else
22273 pushable_regs &= ~((1 << regno) - 1);
22274 break;
22279 /* If we had to find a work register and we have not yet
22280 saved the LR then add it to the list of regs to push. */
22281 if (l_mask == (1 << LR_REGNUM))
22283 pushable_regs |= l_mask;
22284 real_regs_mask |= l_mask;
22285 l_mask = 0;
22288 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22289 RTX_FRAME_RELATED_P (insn) = 1;
22293 /* Load the pic register before setting the frame pointer,
22294 so we can use r7 as a temporary work register. */
22295 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22296 arm_load_pic_register (live_regs_mask);
22298 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22299 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22300 stack_pointer_rtx);
22302 if (flag_stack_usage_info)
22303 current_function_static_stack_size
22304 = offsets->outgoing_args - offsets->saved_args;
22306 amount = offsets->outgoing_args - offsets->saved_regs;
22307 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22308 if (amount)
22310 if (amount < 512)
22312 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22313 GEN_INT (- amount)));
22314 RTX_FRAME_RELATED_P (insn) = 1;
22316 else
22318 rtx reg, dwarf;
22320 /* The stack decrement is too big for an immediate value in a single
22321 insn. In theory we could issue multiple subtracts, but after
22322 three of them it becomes more space efficient to place the full
22323 value in the constant pool and load into a register. (Also the
22324 ARM debugger really likes to see only one stack decrement per
22325 function). So instead we look for a scratch register into which
22326 we can load the decrement, and then we subtract this from the
22327 stack pointer. Unfortunately on the thumb the only available
22328 scratch registers are the argument registers, and we cannot use
22329 these as they may hold arguments to the function. Instead we
22330 attempt to locate a call preserved register which is used by this
22331 function. If we can find one, then we know that it will have
22332 been pushed at the start of the prologue and so we can corrupt
22333 it now. */
22334 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22335 if (live_regs_mask & (1 << regno))
22336 break;
22338 gcc_assert(regno <= LAST_LO_REGNUM);
22340 reg = gen_rtx_REG (SImode, regno);
22342 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22344 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22345 stack_pointer_rtx, reg));
22347 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22348 plus_constant (Pmode, stack_pointer_rtx,
22349 -amount));
22350 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22351 RTX_FRAME_RELATED_P (insn) = 1;
22355 if (frame_pointer_needed)
22356 thumb_set_frame_pointer (offsets);
22358 /* If we are profiling, make sure no instructions are scheduled before
22359 the call to mcount. Similarly if the user has requested no
22360 scheduling in the prolog. Similarly if we want non-call exceptions
22361 using the EABI unwinder, to prevent faulting instructions from being
22362 swapped with a stack adjustment. */
22363 if (crtl->profile || !TARGET_SCHED_PROLOG
22364 || (arm_except_unwind_info (&global_options) == UI_TARGET
22365 && cfun->can_throw_non_call_exceptions))
22366 emit_insn (gen_blockage ());
22368 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22369 if (live_regs_mask & 0xff)
22370 cfun->machine->lr_save_eliminated = 0;
22373 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22374 POP instruction can be generated. LR should be replaced by PC. All
22375 the checks required are already done by USE_RETURN_INSN (). Hence,
22376 all we really need to check here is if single register is to be
22377 returned, or multiple register return. */
22378 void
22379 thumb2_expand_return (void)
22381 int i, num_regs;
22382 unsigned long saved_regs_mask;
22383 arm_stack_offsets *offsets;
22385 offsets = arm_get_frame_offsets ();
22386 saved_regs_mask = offsets->saved_regs_mask;
22388 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22389 if (saved_regs_mask & (1 << i))
22390 num_regs++;
22392 if (saved_regs_mask)
22394 if (num_regs == 1)
22396 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22397 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22398 rtx addr = gen_rtx_MEM (SImode,
22399 gen_rtx_POST_INC (SImode,
22400 stack_pointer_rtx));
22401 set_mem_alias_set (addr, get_frame_alias_set ());
22402 XVECEXP (par, 0, 0) = ret_rtx;
22403 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22404 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22405 emit_jump_insn (par);
22407 else
22409 saved_regs_mask &= ~ (1 << LR_REGNUM);
22410 saved_regs_mask |= (1 << PC_REGNUM);
22411 arm_emit_multi_reg_pop (saved_regs_mask);
22414 else
22416 emit_jump_insn (simple_return_rtx);
22420 void
22421 thumb1_expand_epilogue (void)
22423 HOST_WIDE_INT amount;
22424 arm_stack_offsets *offsets;
22425 int regno;
22427 /* Naked functions don't have prologues. */
22428 if (IS_NAKED (arm_current_func_type ()))
22429 return;
22431 offsets = arm_get_frame_offsets ();
22432 amount = offsets->outgoing_args - offsets->saved_regs;
22434 if (frame_pointer_needed)
22436 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22437 amount = offsets->locals_base - offsets->saved_regs;
22439 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22441 gcc_assert (amount >= 0);
22442 if (amount)
22444 emit_insn (gen_blockage ());
22446 if (amount < 512)
22447 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22448 GEN_INT (amount)));
22449 else
22451 /* r3 is always free in the epilogue. */
22452 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22454 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22455 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22459 /* Emit a USE (stack_pointer_rtx), so that
22460 the stack adjustment will not be deleted. */
22461 emit_insn (gen_prologue_use (stack_pointer_rtx));
22463 if (crtl->profile || !TARGET_SCHED_PROLOG)
22464 emit_insn (gen_blockage ());
22466 /* Emit a clobber for each insn that will be restored in the epilogue,
22467 so that flow2 will get register lifetimes correct. */
22468 for (regno = 0; regno < 13; regno++)
22469 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22470 emit_clobber (gen_rtx_REG (SImode, regno));
22472 if (! df_regs_ever_live_p (LR_REGNUM))
22473 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22476 /* Epilogue code for APCS frame. */
22477 static void
22478 arm_expand_epilogue_apcs_frame (bool really_return)
22480 unsigned long func_type;
22481 unsigned long saved_regs_mask;
22482 int num_regs = 0;
22483 int i;
22484 int floats_from_frame = 0;
22485 arm_stack_offsets *offsets;
22487 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22488 func_type = arm_current_func_type ();
22490 /* Get frame offsets for ARM. */
22491 offsets = arm_get_frame_offsets ();
22492 saved_regs_mask = offsets->saved_regs_mask;
22494 /* Find the offset of the floating-point save area in the frame. */
22495 floats_from_frame = offsets->saved_args - offsets->frame;
22497 /* Compute how many core registers saved and how far away the floats are. */
22498 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22499 if (saved_regs_mask & (1 << i))
22501 num_regs++;
22502 floats_from_frame += 4;
22505 if (TARGET_HARD_FLOAT && TARGET_VFP)
22507 int start_reg;
22509 /* The offset is from IP_REGNUM. */
22510 int saved_size = arm_get_vfp_saved_size ();
22511 if (saved_size > 0)
22513 floats_from_frame += saved_size;
22514 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22515 hard_frame_pointer_rtx,
22516 GEN_INT (-floats_from_frame)));
22519 /* Generate VFP register multi-pop. */
22520 start_reg = FIRST_VFP_REGNUM;
22522 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22523 /* Look for a case where a reg does not need restoring. */
22524 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22525 && (!df_regs_ever_live_p (i + 1)
22526 || call_used_regs[i + 1]))
22528 if (start_reg != i)
22529 arm_emit_vfp_multi_reg_pop (start_reg,
22530 (i - start_reg) / 2,
22531 gen_rtx_REG (SImode,
22532 IP_REGNUM));
22533 start_reg = i + 2;
22536 /* Restore the remaining regs that we have discovered (or possibly
22537 even all of them, if the conditional in the for loop never
22538 fired). */
22539 if (start_reg != i)
22540 arm_emit_vfp_multi_reg_pop (start_reg,
22541 (i - start_reg) / 2,
22542 gen_rtx_REG (SImode, IP_REGNUM));
22545 if (TARGET_IWMMXT)
22547 /* The frame pointer is guaranteed to be non-double-word aligned, as
22548 it is set to double-word-aligned old_stack_pointer - 4. */
22549 rtx insn;
22550 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22552 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22553 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22555 rtx addr = gen_frame_mem (V2SImode,
22556 plus_constant (Pmode, hard_frame_pointer_rtx,
22557 - lrm_count * 4));
22558 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22559 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22560 gen_rtx_REG (V2SImode, i),
22561 NULL_RTX);
22562 lrm_count += 2;
22566 /* saved_regs_mask should contain IP which contains old stack pointer
22567 at the time of activation creation. Since SP and IP are adjacent registers,
22568 we can restore the value directly into SP. */
22569 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22570 saved_regs_mask &= ~(1 << IP_REGNUM);
22571 saved_regs_mask |= (1 << SP_REGNUM);
22573 /* There are two registers left in saved_regs_mask - LR and PC. We
22574 only need to restore LR (the return address), but to
22575 save time we can load it directly into PC, unless we need a
22576 special function exit sequence, or we are not really returning. */
22577 if (really_return
22578 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22579 && !crtl->calls_eh_return)
22580 /* Delete LR from the register mask, so that LR on
22581 the stack is loaded into the PC in the register mask. */
22582 saved_regs_mask &= ~(1 << LR_REGNUM);
22583 else
22584 saved_regs_mask &= ~(1 << PC_REGNUM);
22586 num_regs = bit_count (saved_regs_mask);
22587 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22589 /* Unwind the stack to just below the saved registers. */
22590 emit_insn (gen_addsi3 (stack_pointer_rtx,
22591 hard_frame_pointer_rtx,
22592 GEN_INT (- 4 * num_regs)));
22595 arm_emit_multi_reg_pop (saved_regs_mask);
22597 if (IS_INTERRUPT (func_type))
22599 /* Interrupt handlers will have pushed the
22600 IP onto the stack, so restore it now. */
22601 rtx insn;
22602 rtx addr = gen_rtx_MEM (SImode,
22603 gen_rtx_POST_INC (SImode,
22604 stack_pointer_rtx));
22605 set_mem_alias_set (addr, get_frame_alias_set ());
22606 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22607 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22608 gen_rtx_REG (SImode, IP_REGNUM),
22609 NULL_RTX);
22612 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22613 return;
22615 if (crtl->calls_eh_return)
22616 emit_insn (gen_addsi3 (stack_pointer_rtx,
22617 stack_pointer_rtx,
22618 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22620 if (IS_STACKALIGN (func_type))
22621 /* Restore the original stack pointer. Before prologue, the stack was
22622 realigned and the original stack pointer saved in r0. For details,
22623 see comment in arm_expand_prologue. */
22624 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22626 emit_jump_insn (simple_return_rtx);
22629 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22630 function is not a sibcall. */
22631 void
22632 arm_expand_epilogue (bool really_return)
22634 unsigned long func_type;
22635 unsigned long saved_regs_mask;
22636 int num_regs = 0;
22637 int i;
22638 int amount;
22639 int floats_from_frame = 0;
22640 arm_stack_offsets *offsets;
22642 func_type = arm_current_func_type ();
22644 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22645 let output_return_instruction take care of instruction emition if any. */
22646 if (IS_NAKED (func_type)
22647 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22649 emit_jump_insn (simple_return_rtx);
22650 return;
22653 /* If we are throwing an exception, then we really must be doing a
22654 return, so we can't tail-call. */
22655 gcc_assert (!crtl->calls_eh_return || really_return);
22657 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22659 arm_expand_epilogue_apcs_frame (really_return);
22660 return;
22663 /* Get frame offsets for ARM. */
22664 offsets = arm_get_frame_offsets ();
22665 saved_regs_mask = offsets->saved_regs_mask;
22667 /* Find offset of floating point register from frame pointer.
22668 The initialization is done in this way to take care of frame pointer
22669 and static-chain register, if stored. */
22670 floats_from_frame = offsets->saved_args - offsets->frame;
22671 /* Compute how many registers saved and how far away the floats will be. */
22672 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22673 if (saved_regs_mask & (1 << i))
22675 num_regs++;
22676 floats_from_frame += 4;
22679 if (frame_pointer_needed)
22681 /* Restore stack pointer if necessary. */
22682 if (TARGET_ARM)
22684 /* In ARM mode, frame pointer points to first saved register.
22685 Restore stack pointer to last saved register. */
22686 amount = offsets->frame - offsets->saved_regs;
22688 /* Force out any pending memory operations that reference stacked data
22689 before stack de-allocation occurs. */
22690 emit_insn (gen_blockage ());
22691 emit_insn (gen_addsi3 (stack_pointer_rtx,
22692 hard_frame_pointer_rtx,
22693 GEN_INT (amount)));
22695 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22696 deleted. */
22697 emit_insn (gen_prologue_use (stack_pointer_rtx));
22699 else
22701 /* In Thumb-2 mode, the frame pointer points to the last saved
22702 register. */
22703 amount = offsets->locals_base - offsets->saved_regs;
22704 if (amount)
22705 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22706 hard_frame_pointer_rtx,
22707 GEN_INT (amount)));
22709 /* Force out any pending memory operations that reference stacked data
22710 before stack de-allocation occurs. */
22711 emit_insn (gen_blockage ());
22712 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22713 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22714 deleted. */
22715 emit_insn (gen_prologue_use (stack_pointer_rtx));
22718 else
22720 /* Pop off outgoing args and local frame to adjust stack pointer to
22721 last saved register. */
22722 amount = offsets->outgoing_args - offsets->saved_regs;
22723 if (amount)
22725 /* Force out any pending memory operations that reference stacked data
22726 before stack de-allocation occurs. */
22727 emit_insn (gen_blockage ());
22728 emit_insn (gen_addsi3 (stack_pointer_rtx,
22729 stack_pointer_rtx,
22730 GEN_INT (amount)));
22731 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22732 not deleted. */
22733 emit_insn (gen_prologue_use (stack_pointer_rtx));
22737 if (TARGET_HARD_FLOAT && TARGET_VFP)
22739 /* Generate VFP register multi-pop. */
22740 int end_reg = LAST_VFP_REGNUM + 1;
22742 /* Scan the registers in reverse order. We need to match
22743 any groupings made in the prologue and generate matching
22744 vldm operations. The need to match groups is because,
22745 unlike pop, vldm can only do consecutive regs. */
22746 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22747 /* Look for a case where a reg does not need restoring. */
22748 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22749 && (!df_regs_ever_live_p (i + 1)
22750 || call_used_regs[i + 1]))
22752 /* Restore the regs discovered so far (from reg+2 to
22753 end_reg). */
22754 if (end_reg > i + 2)
22755 arm_emit_vfp_multi_reg_pop (i + 2,
22756 (end_reg - (i + 2)) / 2,
22757 stack_pointer_rtx);
22758 end_reg = i;
22761 /* Restore the remaining regs that we have discovered (or possibly
22762 even all of them, if the conditional in the for loop never
22763 fired). */
22764 if (end_reg > i + 2)
22765 arm_emit_vfp_multi_reg_pop (i + 2,
22766 (end_reg - (i + 2)) / 2,
22767 stack_pointer_rtx);
22770 if (TARGET_IWMMXT)
22771 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22772 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22774 rtx insn;
22775 rtx addr = gen_rtx_MEM (V2SImode,
22776 gen_rtx_POST_INC (SImode,
22777 stack_pointer_rtx));
22778 set_mem_alias_set (addr, get_frame_alias_set ());
22779 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22780 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22781 gen_rtx_REG (V2SImode, i),
22782 NULL_RTX);
22785 if (saved_regs_mask)
22787 rtx insn;
22788 bool return_in_pc = false;
22790 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22791 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22792 && !IS_STACKALIGN (func_type)
22793 && really_return
22794 && crtl->args.pretend_args_size == 0
22795 && saved_regs_mask & (1 << LR_REGNUM)
22796 && !crtl->calls_eh_return)
22798 saved_regs_mask &= ~(1 << LR_REGNUM);
22799 saved_regs_mask |= (1 << PC_REGNUM);
22800 return_in_pc = true;
22803 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22805 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22806 if (saved_regs_mask & (1 << i))
22808 rtx addr = gen_rtx_MEM (SImode,
22809 gen_rtx_POST_INC (SImode,
22810 stack_pointer_rtx));
22811 set_mem_alias_set (addr, get_frame_alias_set ());
22813 if (i == PC_REGNUM)
22815 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22816 XVECEXP (insn, 0, 0) = ret_rtx;
22817 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22818 gen_rtx_REG (SImode, i),
22819 addr);
22820 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22821 insn = emit_jump_insn (insn);
22823 else
22825 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22826 addr));
22827 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22828 gen_rtx_REG (SImode, i),
22829 NULL_RTX);
22833 else
22835 arm_emit_multi_reg_pop (saved_regs_mask);
22838 if (return_in_pc == true)
22839 return;
22842 if (crtl->args.pretend_args_size)
22843 emit_insn (gen_addsi3 (stack_pointer_rtx,
22844 stack_pointer_rtx,
22845 GEN_INT (crtl->args.pretend_args_size)));
22847 if (!really_return)
22848 return;
22850 if (crtl->calls_eh_return)
22851 emit_insn (gen_addsi3 (stack_pointer_rtx,
22852 stack_pointer_rtx,
22853 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
22855 if (IS_STACKALIGN (func_type))
22856 /* Restore the original stack pointer. Before prologue, the stack was
22857 realigned and the original stack pointer saved in r0. For details,
22858 see comment in arm_expand_prologue. */
22859 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22861 emit_jump_insn (simple_return_rtx);
22864 /* Implementation of insn prologue_thumb1_interwork. This is the first
22865 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22867 const char *
22868 thumb1_output_interwork (void)
22870 const char * name;
22871 FILE *f = asm_out_file;
22873 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22874 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22875 == SYMBOL_REF);
22876 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22878 /* Generate code sequence to switch us into Thumb mode. */
22879 /* The .code 32 directive has already been emitted by
22880 ASM_DECLARE_FUNCTION_NAME. */
22881 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22882 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22884 /* Generate a label, so that the debugger will notice the
22885 change in instruction sets. This label is also used by
22886 the assembler to bypass the ARM code when this function
22887 is called from a Thumb encoded function elsewhere in the
22888 same file. Hence the definition of STUB_NAME here must
22889 agree with the definition in gas/config/tc-arm.c. */
22891 #define STUB_NAME ".real_start_of"
22893 fprintf (f, "\t.code\t16\n");
22894 #ifdef ARM_PE
22895 if (arm_dllexport_name_p (name))
22896 name = arm_strip_name_encoding (name);
22897 #endif
22898 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22899 fprintf (f, "\t.thumb_func\n");
22900 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22902 return "";
22905 /* Handle the case of a double word load into a low register from
22906 a computed memory address. The computed address may involve a
22907 register which is overwritten by the load. */
22908 const char *
22909 thumb_load_double_from_address (rtx *operands)
22911 rtx addr;
22912 rtx base;
22913 rtx offset;
22914 rtx arg1;
22915 rtx arg2;
22917 gcc_assert (GET_CODE (operands[0]) == REG);
22918 gcc_assert (GET_CODE (operands[1]) == MEM);
22920 /* Get the memory address. */
22921 addr = XEXP (operands[1], 0);
22923 /* Work out how the memory address is computed. */
22924 switch (GET_CODE (addr))
22926 case REG:
22927 operands[2] = adjust_address (operands[1], SImode, 4);
22929 if (REGNO (operands[0]) == REGNO (addr))
22931 output_asm_insn ("ldr\t%H0, %2", operands);
22932 output_asm_insn ("ldr\t%0, %1", operands);
22934 else
22936 output_asm_insn ("ldr\t%0, %1", operands);
22937 output_asm_insn ("ldr\t%H0, %2", operands);
22939 break;
22941 case CONST:
22942 /* Compute <address> + 4 for the high order load. */
22943 operands[2] = adjust_address (operands[1], SImode, 4);
22945 output_asm_insn ("ldr\t%0, %1", operands);
22946 output_asm_insn ("ldr\t%H0, %2", operands);
22947 break;
22949 case PLUS:
22950 arg1 = XEXP (addr, 0);
22951 arg2 = XEXP (addr, 1);
22953 if (CONSTANT_P (arg1))
22954 base = arg2, offset = arg1;
22955 else
22956 base = arg1, offset = arg2;
22958 gcc_assert (GET_CODE (base) == REG);
22960 /* Catch the case of <address> = <reg> + <reg> */
22961 if (GET_CODE (offset) == REG)
22963 int reg_offset = REGNO (offset);
22964 int reg_base = REGNO (base);
22965 int reg_dest = REGNO (operands[0]);
22967 /* Add the base and offset registers together into the
22968 higher destination register. */
22969 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22970 reg_dest + 1, reg_base, reg_offset);
22972 /* Load the lower destination register from the address in
22973 the higher destination register. */
22974 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22975 reg_dest, reg_dest + 1);
22977 /* Load the higher destination register from its own address
22978 plus 4. */
22979 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22980 reg_dest + 1, reg_dest + 1);
22982 else
22984 /* Compute <address> + 4 for the high order load. */
22985 operands[2] = adjust_address (operands[1], SImode, 4);
22987 /* If the computed address is held in the low order register
22988 then load the high order register first, otherwise always
22989 load the low order register first. */
22990 if (REGNO (operands[0]) == REGNO (base))
22992 output_asm_insn ("ldr\t%H0, %2", operands);
22993 output_asm_insn ("ldr\t%0, %1", operands);
22995 else
22997 output_asm_insn ("ldr\t%0, %1", operands);
22998 output_asm_insn ("ldr\t%H0, %2", operands);
23001 break;
23003 case LABEL_REF:
23004 /* With no registers to worry about we can just load the value
23005 directly. */
23006 operands[2] = adjust_address (operands[1], SImode, 4);
23008 output_asm_insn ("ldr\t%H0, %2", operands);
23009 output_asm_insn ("ldr\t%0, %1", operands);
23010 break;
23012 default:
23013 gcc_unreachable ();
23016 return "";
23019 const char *
23020 thumb_output_move_mem_multiple (int n, rtx *operands)
23022 rtx tmp;
23024 switch (n)
23026 case 2:
23027 if (REGNO (operands[4]) > REGNO (operands[5]))
23029 tmp = operands[4];
23030 operands[4] = operands[5];
23031 operands[5] = tmp;
23033 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23034 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23035 break;
23037 case 3:
23038 if (REGNO (operands[4]) > REGNO (operands[5]))
23040 tmp = operands[4];
23041 operands[4] = operands[5];
23042 operands[5] = tmp;
23044 if (REGNO (operands[5]) > REGNO (operands[6]))
23046 tmp = operands[5];
23047 operands[5] = operands[6];
23048 operands[6] = tmp;
23050 if (REGNO (operands[4]) > REGNO (operands[5]))
23052 tmp = operands[4];
23053 operands[4] = operands[5];
23054 operands[5] = tmp;
23057 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23058 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23059 break;
23061 default:
23062 gcc_unreachable ();
23065 return "";
23068 /* Output a call-via instruction for thumb state. */
23069 const char *
23070 thumb_call_via_reg (rtx reg)
23072 int regno = REGNO (reg);
23073 rtx *labelp;
23075 gcc_assert (regno < LR_REGNUM);
23077 /* If we are in the normal text section we can use a single instance
23078 per compilation unit. If we are doing function sections, then we need
23079 an entry per section, since we can't rely on reachability. */
23080 if (in_section == text_section)
23082 thumb_call_reg_needed = 1;
23084 if (thumb_call_via_label[regno] == NULL)
23085 thumb_call_via_label[regno] = gen_label_rtx ();
23086 labelp = thumb_call_via_label + regno;
23088 else
23090 if (cfun->machine->call_via[regno] == NULL)
23091 cfun->machine->call_via[regno] = gen_label_rtx ();
23092 labelp = cfun->machine->call_via + regno;
23095 output_asm_insn ("bl\t%a0", labelp);
23096 return "";
23099 /* Routines for generating rtl. */
23100 void
23101 thumb_expand_movmemqi (rtx *operands)
23103 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23104 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23105 HOST_WIDE_INT len = INTVAL (operands[2]);
23106 HOST_WIDE_INT offset = 0;
23108 while (len >= 12)
23110 emit_insn (gen_movmem12b (out, in, out, in));
23111 len -= 12;
23114 if (len >= 8)
23116 emit_insn (gen_movmem8b (out, in, out, in));
23117 len -= 8;
23120 if (len >= 4)
23122 rtx reg = gen_reg_rtx (SImode);
23123 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23124 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23125 len -= 4;
23126 offset += 4;
23129 if (len >= 2)
23131 rtx reg = gen_reg_rtx (HImode);
23132 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23133 plus_constant (Pmode, in,
23134 offset))));
23135 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23136 offset)),
23137 reg));
23138 len -= 2;
23139 offset += 2;
23142 if (len)
23144 rtx reg = gen_reg_rtx (QImode);
23145 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23146 plus_constant (Pmode, in,
23147 offset))));
23148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23149 offset)),
23150 reg));
23154 void
23155 thumb_reload_out_hi (rtx *operands)
23157 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23160 /* Handle reading a half-word from memory during reload. */
23161 void
23162 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23164 gcc_unreachable ();
23167 /* Return the length of a function name prefix
23168 that starts with the character 'c'. */
23169 static int
23170 arm_get_strip_length (int c)
23172 switch (c)
23174 ARM_NAME_ENCODING_LENGTHS
23175 default: return 0;
23179 /* Return a pointer to a function's name with any
23180 and all prefix encodings stripped from it. */
23181 const char *
23182 arm_strip_name_encoding (const char *name)
23184 int skip;
23186 while ((skip = arm_get_strip_length (* name)))
23187 name += skip;
23189 return name;
23192 /* If there is a '*' anywhere in the name's prefix, then
23193 emit the stripped name verbatim, otherwise prepend an
23194 underscore if leading underscores are being used. */
23195 void
23196 arm_asm_output_labelref (FILE *stream, const char *name)
23198 int skip;
23199 int verbatim = 0;
23201 while ((skip = arm_get_strip_length (* name)))
23203 verbatim |= (*name == '*');
23204 name += skip;
23207 if (verbatim)
23208 fputs (name, stream);
23209 else
23210 asm_fprintf (stream, "%U%s", name);
23213 /* This function is used to emit an EABI tag and its associated value.
23214 We emit the numerical value of the tag in case the assembler does not
23215 support textual tags. (Eg gas prior to 2.20). If requested we include
23216 the tag name in a comment so that anyone reading the assembler output
23217 will know which tag is being set.
23219 This function is not static because arm-c.c needs it too. */
23221 void
23222 arm_emit_eabi_attribute (const char *name, int num, int val)
23224 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23225 if (flag_verbose_asm || flag_debug_asm)
23226 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23227 asm_fprintf (asm_out_file, "\n");
23230 static void
23231 arm_file_start (void)
23233 int val;
23235 if (TARGET_UNIFIED_ASM)
23236 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23238 if (TARGET_BPABI)
23240 const char *fpu_name;
23241 if (arm_selected_arch)
23242 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23243 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23244 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23245 else
23246 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23248 if (TARGET_SOFT_FLOAT)
23250 fpu_name = "softvfp";
23252 else
23254 fpu_name = arm_fpu_desc->name;
23255 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23257 if (TARGET_HARD_FLOAT)
23258 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23259 if (TARGET_HARD_FLOAT_ABI)
23260 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23263 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23265 /* Some of these attributes only apply when the corresponding features
23266 are used. However we don't have any easy way of figuring this out.
23267 Conservatively record the setting that would have been used. */
23269 if (flag_rounding_math)
23270 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23272 if (!flag_unsafe_math_optimizations)
23274 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23275 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23277 if (flag_signaling_nans)
23278 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23280 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23281 flag_finite_math_only ? 1 : 3);
23283 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23284 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23285 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23286 flag_short_enums ? 1 : 2);
23288 /* Tag_ABI_optimization_goals. */
23289 if (optimize_size)
23290 val = 4;
23291 else if (optimize >= 2)
23292 val = 2;
23293 else if (optimize)
23294 val = 1;
23295 else
23296 val = 6;
23297 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23299 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23300 unaligned_access);
23302 if (arm_fp16_format)
23303 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23304 (int) arm_fp16_format);
23306 if (arm_lang_output_object_attributes_hook)
23307 arm_lang_output_object_attributes_hook();
23310 default_file_start ();
23313 static void
23314 arm_file_end (void)
23316 int regno;
23318 if (NEED_INDICATE_EXEC_STACK)
23319 /* Add .note.GNU-stack. */
23320 file_end_indicate_exec_stack ();
23322 if (! thumb_call_reg_needed)
23323 return;
23325 switch_to_section (text_section);
23326 asm_fprintf (asm_out_file, "\t.code 16\n");
23327 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23329 for (regno = 0; regno < LR_REGNUM; regno++)
23331 rtx label = thumb_call_via_label[regno];
23333 if (label != 0)
23335 targetm.asm_out.internal_label (asm_out_file, "L",
23336 CODE_LABEL_NUMBER (label));
23337 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23342 #ifndef ARM_PE
23343 /* Symbols in the text segment can be accessed without indirecting via the
23344 constant pool; it may take an extra binary operation, but this is still
23345 faster than indirecting via memory. Don't do this when not optimizing,
23346 since we won't be calculating al of the offsets necessary to do this
23347 simplification. */
23349 static void
23350 arm_encode_section_info (tree decl, rtx rtl, int first)
23352 if (optimize > 0 && TREE_CONSTANT (decl))
23353 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23355 default_encode_section_info (decl, rtl, first);
23357 #endif /* !ARM_PE */
23359 static void
23360 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23362 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23363 && !strcmp (prefix, "L"))
23365 arm_ccfsm_state = 0;
23366 arm_target_insn = NULL;
23368 default_internal_label (stream, prefix, labelno);
23371 /* Output code to add DELTA to the first argument, and then jump
23372 to FUNCTION. Used for C++ multiple inheritance. */
23373 static void
23374 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23375 HOST_WIDE_INT delta,
23376 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23377 tree function)
23379 static int thunk_label = 0;
23380 char label[256];
23381 char labelpc[256];
23382 int mi_delta = delta;
23383 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23384 int shift = 0;
23385 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23386 ? 1 : 0);
23387 if (mi_delta < 0)
23388 mi_delta = - mi_delta;
23390 if (TARGET_THUMB1)
23392 int labelno = thunk_label++;
23393 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23394 /* Thunks are entered in arm mode when avaiable. */
23395 if (TARGET_THUMB1_ONLY)
23397 /* push r3 so we can use it as a temporary. */
23398 /* TODO: Omit this save if r3 is not used. */
23399 fputs ("\tpush {r3}\n", file);
23400 fputs ("\tldr\tr3, ", file);
23402 else
23404 fputs ("\tldr\tr12, ", file);
23406 assemble_name (file, label);
23407 fputc ('\n', file);
23408 if (flag_pic)
23410 /* If we are generating PIC, the ldr instruction below loads
23411 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23412 the address of the add + 8, so we have:
23414 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23415 = target + 1.
23417 Note that we have "+ 1" because some versions of GNU ld
23418 don't set the low bit of the result for R_ARM_REL32
23419 relocations against thumb function symbols.
23420 On ARMv6M this is +4, not +8. */
23421 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23422 assemble_name (file, labelpc);
23423 fputs (":\n", file);
23424 if (TARGET_THUMB1_ONLY)
23426 /* This is 2 insns after the start of the thunk, so we know it
23427 is 4-byte aligned. */
23428 fputs ("\tadd\tr3, pc, r3\n", file);
23429 fputs ("\tmov r12, r3\n", file);
23431 else
23432 fputs ("\tadd\tr12, pc, r12\n", file);
23434 else if (TARGET_THUMB1_ONLY)
23435 fputs ("\tmov r12, r3\n", file);
23437 if (TARGET_THUMB1_ONLY)
23439 if (mi_delta > 255)
23441 fputs ("\tldr\tr3, ", file);
23442 assemble_name (file, label);
23443 fputs ("+4\n", file);
23444 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23445 mi_op, this_regno, this_regno);
23447 else if (mi_delta != 0)
23449 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23450 mi_op, this_regno, this_regno,
23451 mi_delta);
23454 else
23456 /* TODO: Use movw/movt for large constants when available. */
23457 while (mi_delta != 0)
23459 if ((mi_delta & (3 << shift)) == 0)
23460 shift += 2;
23461 else
23463 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23464 mi_op, this_regno, this_regno,
23465 mi_delta & (0xff << shift));
23466 mi_delta &= ~(0xff << shift);
23467 shift += 8;
23471 if (TARGET_THUMB1)
23473 if (TARGET_THUMB1_ONLY)
23474 fputs ("\tpop\t{r3}\n", file);
23476 fprintf (file, "\tbx\tr12\n");
23477 ASM_OUTPUT_ALIGN (file, 2);
23478 assemble_name (file, label);
23479 fputs (":\n", file);
23480 if (flag_pic)
23482 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23483 rtx tem = XEXP (DECL_RTL (function), 0);
23484 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23485 tem = gen_rtx_MINUS (GET_MODE (tem),
23486 tem,
23487 gen_rtx_SYMBOL_REF (Pmode,
23488 ggc_strdup (labelpc)));
23489 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23491 else
23492 /* Output ".word .LTHUNKn". */
23493 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23495 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23496 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23498 else
23500 fputs ("\tb\t", file);
23501 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23502 if (NEED_PLT_RELOC)
23503 fputs ("(PLT)", file);
23504 fputc ('\n', file);
23509 arm_emit_vector_const (FILE *file, rtx x)
23511 int i;
23512 const char * pattern;
23514 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23516 switch (GET_MODE (x))
23518 case V2SImode: pattern = "%08x"; break;
23519 case V4HImode: pattern = "%04x"; break;
23520 case V8QImode: pattern = "%02x"; break;
23521 default: gcc_unreachable ();
23524 fprintf (file, "0x");
23525 for (i = CONST_VECTOR_NUNITS (x); i--;)
23527 rtx element;
23529 element = CONST_VECTOR_ELT (x, i);
23530 fprintf (file, pattern, INTVAL (element));
23533 return 1;
23536 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23537 HFmode constant pool entries are actually loaded with ldr. */
23538 void
23539 arm_emit_fp16_const (rtx c)
23541 REAL_VALUE_TYPE r;
23542 long bits;
23544 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23545 bits = real_to_target (NULL, &r, HFmode);
23546 if (WORDS_BIG_ENDIAN)
23547 assemble_zeros (2);
23548 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23549 if (!WORDS_BIG_ENDIAN)
23550 assemble_zeros (2);
23553 const char *
23554 arm_output_load_gr (rtx *operands)
23556 rtx reg;
23557 rtx offset;
23558 rtx wcgr;
23559 rtx sum;
23561 if (GET_CODE (operands [1]) != MEM
23562 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23563 || GET_CODE (reg = XEXP (sum, 0)) != REG
23564 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23565 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23566 return "wldrw%?\t%0, %1";
23568 /* Fix up an out-of-range load of a GR register. */
23569 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23570 wcgr = operands[0];
23571 operands[0] = reg;
23572 output_asm_insn ("ldr%?\t%0, %1", operands);
23574 operands[0] = wcgr;
23575 operands[1] = reg;
23576 output_asm_insn ("tmcr%?\t%0, %1", operands);
23577 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23579 return "";
23582 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23584 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23585 named arg and all anonymous args onto the stack.
23586 XXX I know the prologue shouldn't be pushing registers, but it is faster
23587 that way. */
23589 static void
23590 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23591 enum machine_mode mode,
23592 tree type,
23593 int *pretend_size,
23594 int second_time ATTRIBUTE_UNUSED)
23596 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23597 int nregs;
23599 cfun->machine->uses_anonymous_args = 1;
23600 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23602 nregs = pcum->aapcs_ncrn;
23603 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23604 nregs++;
23606 else
23607 nregs = pcum->nregs;
23609 if (nregs < NUM_ARG_REGS)
23610 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23613 /* Return nonzero if the CONSUMER instruction (a store) does not need
23614 PRODUCER's value to calculate the address. */
23617 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23619 rtx value = PATTERN (producer);
23620 rtx addr = PATTERN (consumer);
23622 if (GET_CODE (value) == COND_EXEC)
23623 value = COND_EXEC_CODE (value);
23624 if (GET_CODE (value) == PARALLEL)
23625 value = XVECEXP (value, 0, 0);
23626 value = XEXP (value, 0);
23627 if (GET_CODE (addr) == COND_EXEC)
23628 addr = COND_EXEC_CODE (addr);
23629 if (GET_CODE (addr) == PARALLEL)
23630 addr = XVECEXP (addr, 0, 0);
23631 addr = XEXP (addr, 0);
23633 return !reg_overlap_mentioned_p (value, addr);
23636 /* Return nonzero if the CONSUMER instruction (a store) does need
23637 PRODUCER's value to calculate the address. */
23640 arm_early_store_addr_dep (rtx producer, rtx consumer)
23642 return !arm_no_early_store_addr_dep (producer, consumer);
23645 /* Return nonzero if the CONSUMER instruction (a load) does need
23646 PRODUCER's value to calculate the address. */
23649 arm_early_load_addr_dep (rtx producer, rtx consumer)
23651 rtx value = PATTERN (producer);
23652 rtx addr = PATTERN (consumer);
23654 if (GET_CODE (value) == COND_EXEC)
23655 value = COND_EXEC_CODE (value);
23656 if (GET_CODE (value) == PARALLEL)
23657 value = XVECEXP (value, 0, 0);
23658 value = XEXP (value, 0);
23659 if (GET_CODE (addr) == COND_EXEC)
23660 addr = COND_EXEC_CODE (addr);
23661 if (GET_CODE (addr) == PARALLEL)
23662 addr = XVECEXP (addr, 0, 0);
23663 addr = XEXP (addr, 1);
23665 return reg_overlap_mentioned_p (value, addr);
23668 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23669 have an early register shift value or amount dependency on the
23670 result of PRODUCER. */
23673 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23675 rtx value = PATTERN (producer);
23676 rtx op = PATTERN (consumer);
23677 rtx early_op;
23679 if (GET_CODE (value) == COND_EXEC)
23680 value = COND_EXEC_CODE (value);
23681 if (GET_CODE (value) == PARALLEL)
23682 value = XVECEXP (value, 0, 0);
23683 value = XEXP (value, 0);
23684 if (GET_CODE (op) == COND_EXEC)
23685 op = COND_EXEC_CODE (op);
23686 if (GET_CODE (op) == PARALLEL)
23687 op = XVECEXP (op, 0, 0);
23688 op = XEXP (op, 1);
23690 early_op = XEXP (op, 0);
23691 /* This is either an actual independent shift, or a shift applied to
23692 the first operand of another operation. We want the whole shift
23693 operation. */
23694 if (GET_CODE (early_op) == REG)
23695 early_op = op;
23697 return !reg_overlap_mentioned_p (value, early_op);
23700 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23701 have an early register shift value dependency on the result of
23702 PRODUCER. */
23705 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23707 rtx value = PATTERN (producer);
23708 rtx op = PATTERN (consumer);
23709 rtx early_op;
23711 if (GET_CODE (value) == COND_EXEC)
23712 value = COND_EXEC_CODE (value);
23713 if (GET_CODE (value) == PARALLEL)
23714 value = XVECEXP (value, 0, 0);
23715 value = XEXP (value, 0);
23716 if (GET_CODE (op) == COND_EXEC)
23717 op = COND_EXEC_CODE (op);
23718 if (GET_CODE (op) == PARALLEL)
23719 op = XVECEXP (op, 0, 0);
23720 op = XEXP (op, 1);
23722 early_op = XEXP (op, 0);
23724 /* This is either an actual independent shift, or a shift applied to
23725 the first operand of another operation. We want the value being
23726 shifted, in either case. */
23727 if (GET_CODE (early_op) != REG)
23728 early_op = XEXP (early_op, 0);
23730 return !reg_overlap_mentioned_p (value, early_op);
23733 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23734 have an early register mult dependency on the result of
23735 PRODUCER. */
23738 arm_no_early_mul_dep (rtx producer, rtx consumer)
23740 rtx value = PATTERN (producer);
23741 rtx op = PATTERN (consumer);
23743 if (GET_CODE (value) == COND_EXEC)
23744 value = COND_EXEC_CODE (value);
23745 if (GET_CODE (value) == PARALLEL)
23746 value = XVECEXP (value, 0, 0);
23747 value = XEXP (value, 0);
23748 if (GET_CODE (op) == COND_EXEC)
23749 op = COND_EXEC_CODE (op);
23750 if (GET_CODE (op) == PARALLEL)
23751 op = XVECEXP (op, 0, 0);
23752 op = XEXP (op, 1);
23754 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23756 if (GET_CODE (XEXP (op, 0)) == MULT)
23757 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23758 else
23759 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23762 return 0;
23765 /* We can't rely on the caller doing the proper promotion when
23766 using APCS or ATPCS. */
23768 static bool
23769 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23771 return !TARGET_AAPCS_BASED;
23774 static enum machine_mode
23775 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23776 enum machine_mode mode,
23777 int *punsignedp ATTRIBUTE_UNUSED,
23778 const_tree fntype ATTRIBUTE_UNUSED,
23779 int for_return ATTRIBUTE_UNUSED)
23781 if (GET_MODE_CLASS (mode) == MODE_INT
23782 && GET_MODE_SIZE (mode) < 4)
23783 return SImode;
23785 return mode;
23788 /* AAPCS based ABIs use short enums by default. */
23790 static bool
23791 arm_default_short_enums (void)
23793 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23797 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23799 static bool
23800 arm_align_anon_bitfield (void)
23802 return TARGET_AAPCS_BASED;
23806 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23808 static tree
23809 arm_cxx_guard_type (void)
23811 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23814 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23815 has an accumulator dependency on the result of the producer (a
23816 multiplication instruction) and no other dependency on that result. */
23818 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23820 rtx mul = PATTERN (producer);
23821 rtx mac = PATTERN (consumer);
23822 rtx mul_result;
23823 rtx mac_op0, mac_op1, mac_acc;
23825 if (GET_CODE (mul) == COND_EXEC)
23826 mul = COND_EXEC_CODE (mul);
23827 if (GET_CODE (mac) == COND_EXEC)
23828 mac = COND_EXEC_CODE (mac);
23830 /* Check that mul is of the form (set (...) (mult ...))
23831 and mla is of the form (set (...) (plus (mult ...) (...))). */
23832 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23833 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23834 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23835 return 0;
23837 mul_result = XEXP (mul, 0);
23838 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23839 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23840 mac_acc = XEXP (XEXP (mac, 1), 1);
23842 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23843 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23844 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23848 /* The EABI says test the least significant bit of a guard variable. */
23850 static bool
23851 arm_cxx_guard_mask_bit (void)
23853 return TARGET_AAPCS_BASED;
23857 /* The EABI specifies that all array cookies are 8 bytes long. */
23859 static tree
23860 arm_get_cookie_size (tree type)
23862 tree size;
23864 if (!TARGET_AAPCS_BASED)
23865 return default_cxx_get_cookie_size (type);
23867 size = build_int_cst (sizetype, 8);
23868 return size;
23872 /* The EABI says that array cookies should also contain the element size. */
23874 static bool
23875 arm_cookie_has_size (void)
23877 return TARGET_AAPCS_BASED;
23881 /* The EABI says constructors and destructors should return a pointer to
23882 the object constructed/destroyed. */
23884 static bool
23885 arm_cxx_cdtor_returns_this (void)
23887 return TARGET_AAPCS_BASED;
23890 /* The EABI says that an inline function may never be the key
23891 method. */
23893 static bool
23894 arm_cxx_key_method_may_be_inline (void)
23896 return !TARGET_AAPCS_BASED;
23899 static void
23900 arm_cxx_determine_class_data_visibility (tree decl)
23902 if (!TARGET_AAPCS_BASED
23903 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23904 return;
23906 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23907 is exported. However, on systems without dynamic vague linkage,
23908 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23909 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23910 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23911 else
23912 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23913 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23916 static bool
23917 arm_cxx_class_data_always_comdat (void)
23919 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23920 vague linkage if the class has no key function. */
23921 return !TARGET_AAPCS_BASED;
23925 /* The EABI says __aeabi_atexit should be used to register static
23926 destructors. */
23928 static bool
23929 arm_cxx_use_aeabi_atexit (void)
23931 return TARGET_AAPCS_BASED;
23935 void
23936 arm_set_return_address (rtx source, rtx scratch)
23938 arm_stack_offsets *offsets;
23939 HOST_WIDE_INT delta;
23940 rtx addr;
23941 unsigned long saved_regs;
23943 offsets = arm_get_frame_offsets ();
23944 saved_regs = offsets->saved_regs_mask;
23946 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23947 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23948 else
23950 if (frame_pointer_needed)
23951 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
23952 else
23954 /* LR will be the first saved register. */
23955 delta = offsets->outgoing_args - (offsets->frame + 4);
23958 if (delta >= 4096)
23960 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23961 GEN_INT (delta & ~4095)));
23962 addr = scratch;
23963 delta &= 4095;
23965 else
23966 addr = stack_pointer_rtx;
23968 addr = plus_constant (Pmode, addr, delta);
23970 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23975 void
23976 thumb_set_return_address (rtx source, rtx scratch)
23978 arm_stack_offsets *offsets;
23979 HOST_WIDE_INT delta;
23980 HOST_WIDE_INT limit;
23981 int reg;
23982 rtx addr;
23983 unsigned long mask;
23985 emit_use (source);
23987 offsets = arm_get_frame_offsets ();
23988 mask = offsets->saved_regs_mask;
23989 if (mask & (1 << LR_REGNUM))
23991 limit = 1024;
23992 /* Find the saved regs. */
23993 if (frame_pointer_needed)
23995 delta = offsets->soft_frame - offsets->saved_args;
23996 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23997 if (TARGET_THUMB1)
23998 limit = 128;
24000 else
24002 delta = offsets->outgoing_args - offsets->saved_args;
24003 reg = SP_REGNUM;
24005 /* Allow for the stack frame. */
24006 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24007 delta -= 16;
24008 /* The link register is always the first saved register. */
24009 delta -= 4;
24011 /* Construct the address. */
24012 addr = gen_rtx_REG (SImode, reg);
24013 if (delta > limit)
24015 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24016 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24017 addr = scratch;
24019 else
24020 addr = plus_constant (Pmode, addr, delta);
24022 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24024 else
24025 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24028 /* Implements target hook vector_mode_supported_p. */
24029 bool
24030 arm_vector_mode_supported_p (enum machine_mode mode)
24032 /* Neon also supports V2SImode, etc. listed in the clause below. */
24033 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24034 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24035 return true;
24037 if ((TARGET_NEON || TARGET_IWMMXT)
24038 && ((mode == V2SImode)
24039 || (mode == V4HImode)
24040 || (mode == V8QImode)))
24041 return true;
24043 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24044 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24045 || mode == V2HAmode))
24046 return true;
24048 return false;
24051 /* Implements target hook array_mode_supported_p. */
24053 static bool
24054 arm_array_mode_supported_p (enum machine_mode mode,
24055 unsigned HOST_WIDE_INT nelems)
24057 if (TARGET_NEON
24058 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24059 && (nelems >= 2 && nelems <= 4))
24060 return true;
24062 return false;
24065 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24066 registers when autovectorizing for Neon, at least until multiple vector
24067 widths are supported properly by the middle-end. */
24069 static enum machine_mode
24070 arm_preferred_simd_mode (enum machine_mode mode)
24072 if (TARGET_NEON)
24073 switch (mode)
24075 case SFmode:
24076 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24077 case SImode:
24078 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24079 case HImode:
24080 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24081 case QImode:
24082 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24083 case DImode:
24084 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24085 return V2DImode;
24086 break;
24088 default:;
24091 if (TARGET_REALLY_IWMMXT)
24092 switch (mode)
24094 case SImode:
24095 return V2SImode;
24096 case HImode:
24097 return V4HImode;
24098 case QImode:
24099 return V8QImode;
24101 default:;
24104 return word_mode;
24107 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24109 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24110 using r0-r4 for function arguments, r7 for the stack frame and don't have
24111 enough left over to do doubleword arithmetic. For Thumb-2 all the
24112 potentially problematic instructions accept high registers so this is not
24113 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24114 that require many low registers. */
24115 static bool
24116 arm_class_likely_spilled_p (reg_class_t rclass)
24118 if ((TARGET_THUMB1 && rclass == LO_REGS)
24119 || rclass == CC_REG)
24120 return true;
24122 return false;
24125 /* Implements target hook small_register_classes_for_mode_p. */
24126 bool
24127 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24129 return TARGET_THUMB1;
24132 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24133 ARM insns and therefore guarantee that the shift count is modulo 256.
24134 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24135 guarantee no particular behavior for out-of-range counts. */
24137 static unsigned HOST_WIDE_INT
24138 arm_shift_truncation_mask (enum machine_mode mode)
24140 return mode == SImode ? 255 : 0;
24144 /* Map internal gcc register numbers to DWARF2 register numbers. */
24146 unsigned int
24147 arm_dbx_register_number (unsigned int regno)
24149 if (regno < 16)
24150 return regno;
24152 if (IS_VFP_REGNUM (regno))
24154 /* See comment in arm_dwarf_register_span. */
24155 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24156 return 64 + regno - FIRST_VFP_REGNUM;
24157 else
24158 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24161 if (IS_IWMMXT_GR_REGNUM (regno))
24162 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24164 if (IS_IWMMXT_REGNUM (regno))
24165 return 112 + regno - FIRST_IWMMXT_REGNUM;
24167 gcc_unreachable ();
24170 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24171 GCC models tham as 64 32-bit registers, so we need to describe this to
24172 the DWARF generation code. Other registers can use the default. */
24173 static rtx
24174 arm_dwarf_register_span (rtx rtl)
24176 unsigned regno;
24177 int nregs;
24178 int i;
24179 rtx p;
24181 regno = REGNO (rtl);
24182 if (!IS_VFP_REGNUM (regno))
24183 return NULL_RTX;
24185 /* XXX FIXME: The EABI defines two VFP register ranges:
24186 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24187 256-287: D0-D31
24188 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24189 corresponding D register. Until GDB supports this, we shall use the
24190 legacy encodings. We also use these encodings for D0-D15 for
24191 compatibility with older debuggers. */
24192 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24193 return NULL_RTX;
24195 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24196 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24197 regno = (regno - FIRST_VFP_REGNUM) / 2;
24198 for (i = 0; i < nregs; i++)
24199 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24201 return p;
24204 #if ARM_UNWIND_INFO
24205 /* Emit unwind directives for a store-multiple instruction or stack pointer
24206 push during alignment.
24207 These should only ever be generated by the function prologue code, so
24208 expect them to have a particular form. */
24210 static void
24211 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24213 int i;
24214 HOST_WIDE_INT offset;
24215 HOST_WIDE_INT nregs;
24216 int reg_size;
24217 unsigned reg;
24218 unsigned lastreg;
24219 rtx e;
24221 e = XVECEXP (p, 0, 0);
24222 if (GET_CODE (e) != SET)
24223 abort ();
24225 /* First insn will adjust the stack pointer. */
24226 if (GET_CODE (e) != SET
24227 || GET_CODE (XEXP (e, 0)) != REG
24228 || REGNO (XEXP (e, 0)) != SP_REGNUM
24229 || GET_CODE (XEXP (e, 1)) != PLUS)
24230 abort ();
24232 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24233 nregs = XVECLEN (p, 0) - 1;
24235 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24236 if (reg < 16)
24238 /* The function prologue may also push pc, but not annotate it as it is
24239 never restored. We turn this into a stack pointer adjustment. */
24240 if (nregs * 4 == offset - 4)
24242 fprintf (asm_out_file, "\t.pad #4\n");
24243 offset -= 4;
24245 reg_size = 4;
24246 fprintf (asm_out_file, "\t.save {");
24248 else if (IS_VFP_REGNUM (reg))
24250 reg_size = 8;
24251 fprintf (asm_out_file, "\t.vsave {");
24253 else
24254 /* Unknown register type. */
24255 abort ();
24257 /* If the stack increment doesn't match the size of the saved registers,
24258 something has gone horribly wrong. */
24259 if (offset != nregs * reg_size)
24260 abort ();
24262 offset = 0;
24263 lastreg = 0;
24264 /* The remaining insns will describe the stores. */
24265 for (i = 1; i <= nregs; i++)
24267 /* Expect (set (mem <addr>) (reg)).
24268 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24269 e = XVECEXP (p, 0, i);
24270 if (GET_CODE (e) != SET
24271 || GET_CODE (XEXP (e, 0)) != MEM
24272 || GET_CODE (XEXP (e, 1)) != REG)
24273 abort ();
24275 reg = REGNO (XEXP (e, 1));
24276 if (reg < lastreg)
24277 abort ();
24279 if (i != 1)
24280 fprintf (asm_out_file, ", ");
24281 /* We can't use %r for vfp because we need to use the
24282 double precision register names. */
24283 if (IS_VFP_REGNUM (reg))
24284 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24285 else
24286 asm_fprintf (asm_out_file, "%r", reg);
24288 #ifdef ENABLE_CHECKING
24289 /* Check that the addresses are consecutive. */
24290 e = XEXP (XEXP (e, 0), 0);
24291 if (GET_CODE (e) == PLUS)
24293 offset += reg_size;
24294 if (GET_CODE (XEXP (e, 0)) != REG
24295 || REGNO (XEXP (e, 0)) != SP_REGNUM
24296 || GET_CODE (XEXP (e, 1)) != CONST_INT
24297 || offset != INTVAL (XEXP (e, 1)))
24298 abort ();
24300 else if (i != 1
24301 || GET_CODE (e) != REG
24302 || REGNO (e) != SP_REGNUM)
24303 abort ();
24304 #endif
24306 fprintf (asm_out_file, "}\n");
24309 /* Emit unwind directives for a SET. */
24311 static void
24312 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24314 rtx e0;
24315 rtx e1;
24316 unsigned reg;
24318 e0 = XEXP (p, 0);
24319 e1 = XEXP (p, 1);
24320 switch (GET_CODE (e0))
24322 case MEM:
24323 /* Pushing a single register. */
24324 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24325 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
24326 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24327 abort ();
24329 asm_fprintf (asm_out_file, "\t.save ");
24330 if (IS_VFP_REGNUM (REGNO (e1)))
24331 asm_fprintf(asm_out_file, "{d%d}\n",
24332 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24333 else
24334 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24335 break;
24337 case REG:
24338 if (REGNO (e0) == SP_REGNUM)
24340 /* A stack increment. */
24341 if (GET_CODE (e1) != PLUS
24342 || GET_CODE (XEXP (e1, 0)) != REG
24343 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24344 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24345 abort ();
24347 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24348 -INTVAL (XEXP (e1, 1)));
24350 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24352 HOST_WIDE_INT offset;
24354 if (GET_CODE (e1) == PLUS)
24356 if (GET_CODE (XEXP (e1, 0)) != REG
24357 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24358 abort ();
24359 reg = REGNO (XEXP (e1, 0));
24360 offset = INTVAL (XEXP (e1, 1));
24361 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24362 HARD_FRAME_POINTER_REGNUM, reg,
24363 offset);
24365 else if (GET_CODE (e1) == REG)
24367 reg = REGNO (e1);
24368 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24369 HARD_FRAME_POINTER_REGNUM, reg);
24371 else
24372 abort ();
24374 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
24376 /* Move from sp to reg. */
24377 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24379 else if (GET_CODE (e1) == PLUS
24380 && GET_CODE (XEXP (e1, 0)) == REG
24381 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24382 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
24384 /* Set reg to offset from sp. */
24385 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24386 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24388 else
24389 abort ();
24390 break;
24392 default:
24393 abort ();
24398 /* Emit unwind directives for the given insn. */
24400 static void
24401 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24403 rtx note, pat;
24404 bool handled_one = false;
24406 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24407 return;
24409 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24410 && (TREE_NOTHROW (current_function_decl)
24411 || crtl->all_throwers_are_sibcalls))
24412 return;
24414 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24415 return;
24417 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24419 pat = XEXP (note, 0);
24420 switch (REG_NOTE_KIND (note))
24422 case REG_FRAME_RELATED_EXPR:
24423 goto found;
24425 case REG_CFA_REGISTER:
24426 if (pat == NULL)
24428 pat = PATTERN (insn);
24429 if (GET_CODE (pat) == PARALLEL)
24430 pat = XVECEXP (pat, 0, 0);
24433 /* Only emitted for IS_STACKALIGN re-alignment. */
24435 rtx dest, src;
24436 unsigned reg;
24438 src = SET_SRC (pat);
24439 dest = SET_DEST (pat);
24441 gcc_assert (src == stack_pointer_rtx);
24442 reg = REGNO (dest);
24443 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24444 reg + 0x90, reg);
24446 handled_one = true;
24447 break;
24449 case REG_CFA_DEF_CFA:
24450 case REG_CFA_EXPRESSION:
24451 case REG_CFA_ADJUST_CFA:
24452 case REG_CFA_OFFSET:
24453 /* ??? Only handling here what we actually emit. */
24454 gcc_unreachable ();
24456 default:
24457 break;
24460 if (handled_one)
24461 return;
24462 pat = PATTERN (insn);
24463 found:
24465 switch (GET_CODE (pat))
24467 case SET:
24468 arm_unwind_emit_set (asm_out_file, pat);
24469 break;
24471 case SEQUENCE:
24472 /* Store multiple. */
24473 arm_unwind_emit_sequence (asm_out_file, pat);
24474 break;
24476 default:
24477 abort();
24482 /* Output a reference from a function exception table to the type_info
24483 object X. The EABI specifies that the symbol should be relocated by
24484 an R_ARM_TARGET2 relocation. */
24486 static bool
24487 arm_output_ttype (rtx x)
24489 fputs ("\t.word\t", asm_out_file);
24490 output_addr_const (asm_out_file, x);
24491 /* Use special relocations for symbol references. */
24492 if (GET_CODE (x) != CONST_INT)
24493 fputs ("(TARGET2)", asm_out_file);
24494 fputc ('\n', asm_out_file);
24496 return TRUE;
24499 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24501 static void
24502 arm_asm_emit_except_personality (rtx personality)
24504 fputs ("\t.personality\t", asm_out_file);
24505 output_addr_const (asm_out_file, personality);
24506 fputc ('\n', asm_out_file);
24509 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24511 static void
24512 arm_asm_init_sections (void)
24514 exception_section = get_unnamed_section (0, output_section_asm_op,
24515 "\t.handlerdata");
24517 #endif /* ARM_UNWIND_INFO */
24519 /* Output unwind directives for the start/end of a function. */
24521 void
24522 arm_output_fn_unwind (FILE * f, bool prologue)
24524 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24525 return;
24527 if (prologue)
24528 fputs ("\t.fnstart\n", f);
24529 else
24531 /* If this function will never be unwound, then mark it as such.
24532 The came condition is used in arm_unwind_emit to suppress
24533 the frame annotations. */
24534 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24535 && (TREE_NOTHROW (current_function_decl)
24536 || crtl->all_throwers_are_sibcalls))
24537 fputs("\t.cantunwind\n", f);
24539 fputs ("\t.fnend\n", f);
24543 static bool
24544 arm_emit_tls_decoration (FILE *fp, rtx x)
24546 enum tls_reloc reloc;
24547 rtx val;
24549 val = XVECEXP (x, 0, 0);
24550 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24552 output_addr_const (fp, val);
24554 switch (reloc)
24556 case TLS_GD32:
24557 fputs ("(tlsgd)", fp);
24558 break;
24559 case TLS_LDM32:
24560 fputs ("(tlsldm)", fp);
24561 break;
24562 case TLS_LDO32:
24563 fputs ("(tlsldo)", fp);
24564 break;
24565 case TLS_IE32:
24566 fputs ("(gottpoff)", fp);
24567 break;
24568 case TLS_LE32:
24569 fputs ("(tpoff)", fp);
24570 break;
24571 case TLS_DESCSEQ:
24572 fputs ("(tlsdesc)", fp);
24573 break;
24574 default:
24575 gcc_unreachable ();
24578 switch (reloc)
24580 case TLS_GD32:
24581 case TLS_LDM32:
24582 case TLS_IE32:
24583 case TLS_DESCSEQ:
24584 fputs (" + (. - ", fp);
24585 output_addr_const (fp, XVECEXP (x, 0, 2));
24586 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24587 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24588 output_addr_const (fp, XVECEXP (x, 0, 3));
24589 fputc (')', fp);
24590 break;
24591 default:
24592 break;
24595 return TRUE;
24598 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24600 static void
24601 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24603 gcc_assert (size == 4);
24604 fputs ("\t.word\t", file);
24605 output_addr_const (file, x);
24606 fputs ("(tlsldo)", file);
24609 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24611 static bool
24612 arm_output_addr_const_extra (FILE *fp, rtx x)
24614 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24615 return arm_emit_tls_decoration (fp, x);
24616 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24618 char label[256];
24619 int labelno = INTVAL (XVECEXP (x, 0, 0));
24621 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24622 assemble_name_raw (fp, label);
24624 return TRUE;
24626 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24628 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24629 if (GOT_PCREL)
24630 fputs ("+.", fp);
24631 fputs ("-(", fp);
24632 output_addr_const (fp, XVECEXP (x, 0, 0));
24633 fputc (')', fp);
24634 return TRUE;
24636 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24638 output_addr_const (fp, XVECEXP (x, 0, 0));
24639 if (GOT_PCREL)
24640 fputs ("+.", fp);
24641 fputs ("-(", fp);
24642 output_addr_const (fp, XVECEXP (x, 0, 1));
24643 fputc (')', fp);
24644 return TRUE;
24646 else if (GET_CODE (x) == CONST_VECTOR)
24647 return arm_emit_vector_const (fp, x);
24649 return FALSE;
24652 /* Output assembly for a shift instruction.
24653 SET_FLAGS determines how the instruction modifies the condition codes.
24654 0 - Do not set condition codes.
24655 1 - Set condition codes.
24656 2 - Use smallest instruction. */
24657 const char *
24658 arm_output_shift(rtx * operands, int set_flags)
24660 char pattern[100];
24661 static const char flag_chars[3] = {'?', '.', '!'};
24662 const char *shift;
24663 HOST_WIDE_INT val;
24664 char c;
24666 c = flag_chars[set_flags];
24667 if (TARGET_UNIFIED_ASM)
24669 shift = shift_op(operands[3], &val);
24670 if (shift)
24672 if (val != -1)
24673 operands[2] = GEN_INT(val);
24674 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24676 else
24677 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24679 else
24680 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24681 output_asm_insn (pattern, operands);
24682 return "";
24685 /* Output assembly for a WMMX immediate shift instruction. */
24686 const char *
24687 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24689 int shift = INTVAL (operands[2]);
24690 char templ[50];
24691 enum machine_mode opmode = GET_MODE (operands[0]);
24693 gcc_assert (shift >= 0);
24695 /* If the shift value in the register versions is > 63 (for D qualifier),
24696 31 (for W qualifier) or 15 (for H qualifier). */
24697 if (((opmode == V4HImode) && (shift > 15))
24698 || ((opmode == V2SImode) && (shift > 31))
24699 || ((opmode == DImode) && (shift > 63)))
24701 if (wror_or_wsra)
24703 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24704 output_asm_insn (templ, operands);
24705 if (opmode == DImode)
24707 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24708 output_asm_insn (templ, operands);
24711 else
24713 /* The destination register will contain all zeros. */
24714 sprintf (templ, "wzero\t%%0");
24715 output_asm_insn (templ, operands);
24717 return "";
24720 if ((opmode == DImode) && (shift > 32))
24722 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24723 output_asm_insn (templ, operands);
24724 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24725 output_asm_insn (templ, operands);
24727 else
24729 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24730 output_asm_insn (templ, operands);
24732 return "";
24735 /* Output assembly for a WMMX tinsr instruction. */
24736 const char *
24737 arm_output_iwmmxt_tinsr (rtx *operands)
24739 int mask = INTVAL (operands[3]);
24740 int i;
24741 char templ[50];
24742 int units = mode_nunits[GET_MODE (operands[0])];
24743 gcc_assert ((mask & (mask - 1)) == 0);
24744 for (i = 0; i < units; ++i)
24746 if ((mask & 0x01) == 1)
24748 break;
24750 mask >>= 1;
24752 gcc_assert (i < units);
24754 switch (GET_MODE (operands[0]))
24756 case V8QImode:
24757 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24758 break;
24759 case V4HImode:
24760 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24761 break;
24762 case V2SImode:
24763 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24764 break;
24765 default:
24766 gcc_unreachable ();
24767 break;
24769 output_asm_insn (templ, operands);
24771 return "";
24774 /* Output a Thumb-1 casesi dispatch sequence. */
24775 const char *
24776 thumb1_output_casesi (rtx *operands)
24778 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24780 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24782 switch (GET_MODE(diff_vec))
24784 case QImode:
24785 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24786 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24787 case HImode:
24788 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24789 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24790 case SImode:
24791 return "bl\t%___gnu_thumb1_case_si";
24792 default:
24793 gcc_unreachable ();
24797 /* Output a Thumb-2 casesi instruction. */
24798 const char *
24799 thumb2_output_casesi (rtx *operands)
24801 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24803 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24805 output_asm_insn ("cmp\t%0, %1", operands);
24806 output_asm_insn ("bhi\t%l3", operands);
24807 switch (GET_MODE(diff_vec))
24809 case QImode:
24810 return "tbb\t[%|pc, %0]";
24811 case HImode:
24812 return "tbh\t[%|pc, %0, lsl #1]";
24813 case SImode:
24814 if (flag_pic)
24816 output_asm_insn ("adr\t%4, %l2", operands);
24817 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24818 output_asm_insn ("add\t%4, %4, %5", operands);
24819 return "bx\t%4";
24821 else
24823 output_asm_insn ("adr\t%4, %l2", operands);
24824 return "ldr\t%|pc, [%4, %0, lsl #2]";
24826 default:
24827 gcc_unreachable ();
24831 /* Most ARM cores are single issue, but some newer ones can dual issue.
24832 The scheduler descriptions rely on this being correct. */
24833 static int
24834 arm_issue_rate (void)
24836 switch (arm_tune)
24838 case cortexa15:
24839 return 3;
24841 case cortexr4:
24842 case cortexr4f:
24843 case cortexr5:
24844 case genericv7a:
24845 case cortexa5:
24846 case cortexa8:
24847 case cortexa9:
24848 case fa726te:
24849 return 2;
24851 default:
24852 return 1;
24856 /* A table and a function to perform ARM-specific name mangling for
24857 NEON vector types in order to conform to the AAPCS (see "Procedure
24858 Call Standard for the ARM Architecture", Appendix A). To qualify
24859 for emission with the mangled names defined in that document, a
24860 vector type must not only be of the correct mode but also be
24861 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24862 typedef struct
24864 enum machine_mode mode;
24865 const char *element_type_name;
24866 const char *aapcs_name;
24867 } arm_mangle_map_entry;
24869 static arm_mangle_map_entry arm_mangle_map[] = {
24870 /* 64-bit containerized types. */
24871 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24872 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24873 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24874 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24875 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24876 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24877 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24878 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24879 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24880 /* 128-bit containerized types. */
24881 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24882 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24883 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24884 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24885 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24886 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24887 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24888 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24889 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24890 { VOIDmode, NULL, NULL }
24893 const char *
24894 arm_mangle_type (const_tree type)
24896 arm_mangle_map_entry *pos = arm_mangle_map;
24898 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24899 has to be managled as if it is in the "std" namespace. */
24900 if (TARGET_AAPCS_BASED
24901 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24903 static bool warned;
24904 if (!warned && warn_psabi && !in_system_header)
24906 warned = true;
24907 inform (input_location,
24908 "the mangling of %<va_list%> has changed in GCC 4.4");
24910 return "St9__va_list";
24913 /* Half-precision float. */
24914 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24915 return "Dh";
24917 if (TREE_CODE (type) != VECTOR_TYPE)
24918 return NULL;
24920 /* Check the mode of the vector type, and the name of the vector
24921 element type, against the table. */
24922 while (pos->mode != VOIDmode)
24924 tree elt_type = TREE_TYPE (type);
24926 if (pos->mode == TYPE_MODE (type)
24927 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24928 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24929 pos->element_type_name))
24930 return pos->aapcs_name;
24932 pos++;
24935 /* Use the default mangling for unrecognized (possibly user-defined)
24936 vector types. */
24937 return NULL;
24940 /* Order of allocation of core registers for Thumb: this allocation is
24941 written over the corresponding initial entries of the array
24942 initialized with REG_ALLOC_ORDER. We allocate all low registers
24943 first. Saving and restoring a low register is usually cheaper than
24944 using a call-clobbered high register. */
24946 static const int thumb_core_reg_alloc_order[] =
24948 3, 2, 1, 0, 4, 5, 6, 7,
24949 14, 12, 8, 9, 10, 11
24952 /* Adjust register allocation order when compiling for Thumb. */
24954 void
24955 arm_order_regs_for_local_alloc (void)
24957 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24958 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24959 if (TARGET_THUMB)
24960 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24961 sizeof (thumb_core_reg_alloc_order));
24964 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24966 bool
24967 arm_frame_pointer_required (void)
24969 return (cfun->has_nonlocal_label
24970 || SUBTARGET_FRAME_POINTER_REQUIRED
24971 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24974 /* Only thumb1 can't support conditional execution, so return true if
24975 the target is not thumb1. */
24976 static bool
24977 arm_have_conditional_execution (void)
24979 return !TARGET_THUMB1;
24982 static unsigned int
24983 arm_autovectorize_vector_sizes (void)
24985 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24988 static bool
24989 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24991 /* Vectors which aren't in packed structures will not be less aligned than
24992 the natural alignment of their element type, so this is safe. */
24993 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24994 return !is_packed;
24996 return default_builtin_vector_alignment_reachable (type, is_packed);
24999 static bool
25000 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25001 const_tree type, int misalignment,
25002 bool is_packed)
25004 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25006 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25008 if (is_packed)
25009 return align == 1;
25011 /* If the misalignment is unknown, we should be able to handle the access
25012 so long as it is not to a member of a packed data structure. */
25013 if (misalignment == -1)
25014 return true;
25016 /* Return true if the misalignment is a multiple of the natural alignment
25017 of the vector's element type. This is probably always going to be
25018 true in practice, since we've already established that this isn't a
25019 packed access. */
25020 return ((misalignment % align) == 0);
25023 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25024 is_packed);
25027 static void
25028 arm_conditional_register_usage (void)
25030 int regno;
25032 if (TARGET_THUMB1 && optimize_size)
25034 /* When optimizing for size on Thumb-1, it's better not
25035 to use the HI regs, because of the overhead of
25036 stacking them. */
25037 for (regno = FIRST_HI_REGNUM;
25038 regno <= LAST_HI_REGNUM; ++regno)
25039 fixed_regs[regno] = call_used_regs[regno] = 1;
25042 /* The link register can be clobbered by any branch insn,
25043 but we have no way to track that at present, so mark
25044 it as unavailable. */
25045 if (TARGET_THUMB1)
25046 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25048 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25050 /* VFPv3 registers are disabled when earlier VFP
25051 versions are selected due to the definition of
25052 LAST_VFP_REGNUM. */
25053 for (regno = FIRST_VFP_REGNUM;
25054 regno <= LAST_VFP_REGNUM; ++ regno)
25056 fixed_regs[regno] = 0;
25057 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25058 || regno >= FIRST_VFP_REGNUM + 32;
25062 if (TARGET_REALLY_IWMMXT)
25064 regno = FIRST_IWMMXT_GR_REGNUM;
25065 /* The 2002/10/09 revision of the XScale ABI has wCG0
25066 and wCG1 as call-preserved registers. The 2002/11/21
25067 revision changed this so that all wCG registers are
25068 scratch registers. */
25069 for (regno = FIRST_IWMMXT_GR_REGNUM;
25070 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25071 fixed_regs[regno] = 0;
25072 /* The XScale ABI has wR0 - wR9 as scratch registers,
25073 the rest as call-preserved registers. */
25074 for (regno = FIRST_IWMMXT_REGNUM;
25075 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25077 fixed_regs[regno] = 0;
25078 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25082 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25084 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25085 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25087 else if (TARGET_APCS_STACK)
25089 fixed_regs[10] = 1;
25090 call_used_regs[10] = 1;
25092 /* -mcaller-super-interworking reserves r11 for calls to
25093 _interwork_r11_call_via_rN(). Making the register global
25094 is an easy way of ensuring that it remains valid for all
25095 calls. */
25096 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25097 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25099 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25100 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25101 if (TARGET_CALLER_INTERWORKING)
25102 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25104 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25107 static reg_class_t
25108 arm_preferred_rename_class (reg_class_t rclass)
25110 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25111 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25112 and code size can be reduced. */
25113 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25114 return LO_REGS;
25115 else
25116 return NO_REGS;
25119 /* Compute the atrribute "length" of insn "*push_multi".
25120 So this function MUST be kept in sync with that insn pattern. */
25122 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25124 int i, regno, hi_reg;
25125 int num_saves = XVECLEN (parallel_op, 0);
25127 /* ARM mode. */
25128 if (TARGET_ARM)
25129 return 4;
25130 /* Thumb1 mode. */
25131 if (TARGET_THUMB1)
25132 return 2;
25134 /* Thumb2 mode. */
25135 regno = REGNO (first_op);
25136 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25137 for (i = 1; i < num_saves && !hi_reg; i++)
25139 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25140 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25143 if (!hi_reg)
25144 return 2;
25145 return 4;
25148 /* Compute the number of instructions emitted by output_move_double. */
25150 arm_count_output_move_double_insns (rtx *operands)
25152 int count;
25153 rtx ops[2];
25154 /* output_move_double may modify the operands array, so call it
25155 here on a copy of the array. */
25156 ops[0] = operands[0];
25157 ops[1] = operands[1];
25158 output_move_double (ops, false, &count);
25159 return count;
25163 vfp3_const_double_for_fract_bits (rtx operand)
25165 REAL_VALUE_TYPE r0;
25167 if (GET_CODE (operand) != CONST_DOUBLE)
25168 return 0;
25170 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25171 if (exact_real_inverse (DFmode, &r0))
25173 if (exact_real_truncate (DFmode, &r0))
25175 HOST_WIDE_INT value = real_to_integer (&r0);
25176 value = value & 0xffffffff;
25177 if ((value != 0) && ( (value & (value - 1)) == 0))
25178 return int_log2 (value);
25181 return 0;
25184 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25186 static void
25187 arm_pre_atomic_barrier (enum memmodel model)
25189 if (need_atomic_barrier_p (model, true))
25190 emit_insn (gen_memory_barrier ());
25193 static void
25194 arm_post_atomic_barrier (enum memmodel model)
25196 if (need_atomic_barrier_p (model, false))
25197 emit_insn (gen_memory_barrier ());
25200 /* Emit the load-exclusive and store-exclusive instructions. */
25202 static void
25203 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25205 rtx (*gen) (rtx, rtx);
25207 switch (mode)
25209 case QImode: gen = gen_arm_load_exclusiveqi; break;
25210 case HImode: gen = gen_arm_load_exclusivehi; break;
25211 case SImode: gen = gen_arm_load_exclusivesi; break;
25212 case DImode: gen = gen_arm_load_exclusivedi; break;
25213 default:
25214 gcc_unreachable ();
25217 emit_insn (gen (rval, mem));
25220 static void
25221 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25223 rtx (*gen) (rtx, rtx, rtx);
25225 switch (mode)
25227 case QImode: gen = gen_arm_store_exclusiveqi; break;
25228 case HImode: gen = gen_arm_store_exclusivehi; break;
25229 case SImode: gen = gen_arm_store_exclusivesi; break;
25230 case DImode: gen = gen_arm_store_exclusivedi; break;
25231 default:
25232 gcc_unreachable ();
25235 emit_insn (gen (bval, rval, mem));
25238 /* Mark the previous jump instruction as unlikely. */
25240 static void
25241 emit_unlikely_jump (rtx insn)
25243 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25245 insn = emit_jump_insn (insn);
25246 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25249 /* Expand a compare and swap pattern. */
25251 void
25252 arm_expand_compare_and_swap (rtx operands[])
25254 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25255 enum machine_mode mode;
25256 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25258 bval = operands[0];
25259 rval = operands[1];
25260 mem = operands[2];
25261 oldval = operands[3];
25262 newval = operands[4];
25263 is_weak = operands[5];
25264 mod_s = operands[6];
25265 mod_f = operands[7];
25266 mode = GET_MODE (mem);
25268 switch (mode)
25270 case QImode:
25271 case HImode:
25272 /* For narrow modes, we're going to perform the comparison in SImode,
25273 so do the zero-extension now. */
25274 rval = gen_reg_rtx (SImode);
25275 oldval = convert_modes (SImode, mode, oldval, true);
25276 /* FALLTHRU */
25278 case SImode:
25279 /* Force the value into a register if needed. We waited until after
25280 the zero-extension above to do this properly. */
25281 if (!arm_add_operand (oldval, mode))
25282 oldval = force_reg (mode, oldval);
25283 break;
25285 case DImode:
25286 if (!cmpdi_operand (oldval, mode))
25287 oldval = force_reg (mode, oldval);
25288 break;
25290 default:
25291 gcc_unreachable ();
25294 switch (mode)
25296 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25297 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25298 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25299 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25300 default:
25301 gcc_unreachable ();
25304 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25306 if (mode == QImode || mode == HImode)
25307 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25309 /* In all cases, we arrange for success to be signaled by Z set.
25310 This arrangement allows for the boolean result to be used directly
25311 in a subsequent branch, post optimization. */
25312 x = gen_rtx_REG (CCmode, CC_REGNUM);
25313 x = gen_rtx_EQ (SImode, x, const0_rtx);
25314 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25317 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25318 another memory store between the load-exclusive and store-exclusive can
25319 reset the monitor from Exclusive to Open state. This means we must wait
25320 until after reload to split the pattern, lest we get a register spill in
25321 the middle of the atomic sequence. */
25323 void
25324 arm_split_compare_and_swap (rtx operands[])
25326 rtx rval, mem, oldval, newval, scratch;
25327 enum machine_mode mode;
25328 enum memmodel mod_s, mod_f;
25329 bool is_weak;
25330 rtx label1, label2, x, cond;
25332 rval = operands[0];
25333 mem = operands[1];
25334 oldval = operands[2];
25335 newval = operands[3];
25336 is_weak = (operands[4] != const0_rtx);
25337 mod_s = (enum memmodel) INTVAL (operands[5]);
25338 mod_f = (enum memmodel) INTVAL (operands[6]);
25339 scratch = operands[7];
25340 mode = GET_MODE (mem);
25342 arm_pre_atomic_barrier (mod_s);
25344 label1 = NULL_RTX;
25345 if (!is_weak)
25347 label1 = gen_label_rtx ();
25348 emit_label (label1);
25350 label2 = gen_label_rtx ();
25352 arm_emit_load_exclusive (mode, rval, mem);
25354 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25355 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25356 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25357 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25358 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25360 arm_emit_store_exclusive (mode, scratch, mem, newval);
25362 /* Weak or strong, we want EQ to be true for success, so that we
25363 match the flags that we got from the compare above. */
25364 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25365 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25366 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25368 if (!is_weak)
25370 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25371 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25372 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25373 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25376 if (mod_f != MEMMODEL_RELAXED)
25377 emit_label (label2);
25379 arm_post_atomic_barrier (mod_s);
25381 if (mod_f == MEMMODEL_RELAXED)
25382 emit_label (label2);
25385 void
25386 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25387 rtx value, rtx model_rtx, rtx cond)
25389 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25390 enum machine_mode mode = GET_MODE (mem);
25391 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25392 rtx label, x;
25394 arm_pre_atomic_barrier (model);
25396 label = gen_label_rtx ();
25397 emit_label (label);
25399 if (new_out)
25400 new_out = gen_lowpart (wmode, new_out);
25401 if (old_out)
25402 old_out = gen_lowpart (wmode, old_out);
25403 else
25404 old_out = new_out;
25405 value = simplify_gen_subreg (wmode, value, mode, 0);
25407 arm_emit_load_exclusive (mode, old_out, mem);
25409 switch (code)
25411 case SET:
25412 new_out = value;
25413 break;
25415 case NOT:
25416 x = gen_rtx_AND (wmode, old_out, value);
25417 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25418 x = gen_rtx_NOT (wmode, new_out);
25419 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25420 break;
25422 case MINUS:
25423 if (CONST_INT_P (value))
25425 value = GEN_INT (-INTVAL (value));
25426 code = PLUS;
25428 /* FALLTHRU */
25430 case PLUS:
25431 if (mode == DImode)
25433 /* DImode plus/minus need to clobber flags. */
25434 /* The adddi3 and subdi3 patterns are incorrectly written so that
25435 they require matching operands, even when we could easily support
25436 three operands. Thankfully, this can be fixed up post-splitting,
25437 as the individual add+adc patterns do accept three operands and
25438 post-reload cprop can make these moves go away. */
25439 emit_move_insn (new_out, old_out);
25440 if (code == PLUS)
25441 x = gen_adddi3 (new_out, new_out, value);
25442 else
25443 x = gen_subdi3 (new_out, new_out, value);
25444 emit_insn (x);
25445 break;
25447 /* FALLTHRU */
25449 default:
25450 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25451 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25452 break;
25455 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25457 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25458 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25460 arm_post_atomic_barrier (model);
25463 #define MAX_VECT_LEN 16
25465 struct expand_vec_perm_d
25467 rtx target, op0, op1;
25468 unsigned char perm[MAX_VECT_LEN];
25469 enum machine_mode vmode;
25470 unsigned char nelt;
25471 bool one_vector_p;
25472 bool testing_p;
25475 /* Generate a variable permutation. */
25477 static void
25478 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25480 enum machine_mode vmode = GET_MODE (target);
25481 bool one_vector_p = rtx_equal_p (op0, op1);
25483 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25484 gcc_checking_assert (GET_MODE (op0) == vmode);
25485 gcc_checking_assert (GET_MODE (op1) == vmode);
25486 gcc_checking_assert (GET_MODE (sel) == vmode);
25487 gcc_checking_assert (TARGET_NEON);
25489 if (one_vector_p)
25491 if (vmode == V8QImode)
25492 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25493 else
25494 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25496 else
25498 rtx pair;
25500 if (vmode == V8QImode)
25502 pair = gen_reg_rtx (V16QImode);
25503 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25504 pair = gen_lowpart (TImode, pair);
25505 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25507 else
25509 pair = gen_reg_rtx (OImode);
25510 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25511 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25516 void
25517 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25519 enum machine_mode vmode = GET_MODE (target);
25520 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25521 bool one_vector_p = rtx_equal_p (op0, op1);
25522 rtx rmask[MAX_VECT_LEN], mask;
25524 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25525 numbering of elements for big-endian, we must reverse the order. */
25526 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25528 /* The VTBL instruction does not use a modulo index, so we must take care
25529 of that ourselves. */
25530 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25531 for (i = 0; i < nelt; ++i)
25532 rmask[i] = mask;
25533 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25534 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25536 arm_expand_vec_perm_1 (target, op0, op1, sel);
25539 /* Generate or test for an insn that supports a constant permutation. */
25541 /* Recognize patterns for the VUZP insns. */
25543 static bool
25544 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25546 unsigned int i, odd, mask, nelt = d->nelt;
25547 rtx out0, out1, in0, in1, x;
25548 rtx (*gen)(rtx, rtx, rtx, rtx);
25550 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25551 return false;
25553 /* Note that these are little-endian tests. Adjust for big-endian later. */
25554 if (d->perm[0] == 0)
25555 odd = 0;
25556 else if (d->perm[0] == 1)
25557 odd = 1;
25558 else
25559 return false;
25560 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25562 for (i = 0; i < nelt; i++)
25564 unsigned elt = (i * 2 + odd) & mask;
25565 if (d->perm[i] != elt)
25566 return false;
25569 /* Success! */
25570 if (d->testing_p)
25571 return true;
25573 switch (d->vmode)
25575 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25576 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25577 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25578 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25579 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25580 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25581 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25582 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25583 default:
25584 gcc_unreachable ();
25587 in0 = d->op0;
25588 in1 = d->op1;
25589 if (BYTES_BIG_ENDIAN)
25591 x = in0, in0 = in1, in1 = x;
25592 odd = !odd;
25595 out0 = d->target;
25596 out1 = gen_reg_rtx (d->vmode);
25597 if (odd)
25598 x = out0, out0 = out1, out1 = x;
25600 emit_insn (gen (out0, in0, in1, out1));
25601 return true;
25604 /* Recognize patterns for the VZIP insns. */
25606 static bool
25607 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25609 unsigned int i, high, mask, nelt = d->nelt;
25610 rtx out0, out1, in0, in1, x;
25611 rtx (*gen)(rtx, rtx, rtx, rtx);
25613 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25614 return false;
25616 /* Note that these are little-endian tests. Adjust for big-endian later. */
25617 high = nelt / 2;
25618 if (d->perm[0] == high)
25620 else if (d->perm[0] == 0)
25621 high = 0;
25622 else
25623 return false;
25624 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25626 for (i = 0; i < nelt / 2; i++)
25628 unsigned elt = (i + high) & mask;
25629 if (d->perm[i * 2] != elt)
25630 return false;
25631 elt = (elt + nelt) & mask;
25632 if (d->perm[i * 2 + 1] != elt)
25633 return false;
25636 /* Success! */
25637 if (d->testing_p)
25638 return true;
25640 switch (d->vmode)
25642 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25643 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25644 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25645 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25646 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25647 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25648 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25649 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25650 default:
25651 gcc_unreachable ();
25654 in0 = d->op0;
25655 in1 = d->op1;
25656 if (BYTES_BIG_ENDIAN)
25658 x = in0, in0 = in1, in1 = x;
25659 high = !high;
25662 out0 = d->target;
25663 out1 = gen_reg_rtx (d->vmode);
25664 if (high)
25665 x = out0, out0 = out1, out1 = x;
25667 emit_insn (gen (out0, in0, in1, out1));
25668 return true;
25671 /* Recognize patterns for the VREV insns. */
25673 static bool
25674 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25676 unsigned int i, j, diff, nelt = d->nelt;
25677 rtx (*gen)(rtx, rtx, rtx);
25679 if (!d->one_vector_p)
25680 return false;
25682 diff = d->perm[0];
25683 switch (diff)
25685 case 7:
25686 switch (d->vmode)
25688 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25689 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25690 default:
25691 return false;
25693 break;
25694 case 3:
25695 switch (d->vmode)
25697 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25698 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25699 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25700 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25701 default:
25702 return false;
25704 break;
25705 case 1:
25706 switch (d->vmode)
25708 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25709 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25710 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25711 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25712 case V4SImode: gen = gen_neon_vrev64v4si; break;
25713 case V2SImode: gen = gen_neon_vrev64v2si; break;
25714 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25715 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25716 default:
25717 return false;
25719 break;
25720 default:
25721 return false;
25724 for (i = 0; i < nelt ; i += diff + 1)
25725 for (j = 0; j <= diff; j += 1)
25727 /* This is guaranteed to be true as the value of diff
25728 is 7, 3, 1 and we should have enough elements in the
25729 queue to generate this. Getting a vector mask with a
25730 value of diff other than these values implies that
25731 something is wrong by the time we get here. */
25732 gcc_assert (i + j < nelt);
25733 if (d->perm[i + j] != i + diff - j)
25734 return false;
25737 /* Success! */
25738 if (d->testing_p)
25739 return true;
25741 /* ??? The third operand is an artifact of the builtin infrastructure
25742 and is ignored by the actual instruction. */
25743 emit_insn (gen (d->target, d->op0, const0_rtx));
25744 return true;
25747 /* Recognize patterns for the VTRN insns. */
25749 static bool
25750 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25752 unsigned int i, odd, mask, nelt = d->nelt;
25753 rtx out0, out1, in0, in1, x;
25754 rtx (*gen)(rtx, rtx, rtx, rtx);
25756 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25757 return false;
25759 /* Note that these are little-endian tests. Adjust for big-endian later. */
25760 if (d->perm[0] == 0)
25761 odd = 0;
25762 else if (d->perm[0] == 1)
25763 odd = 1;
25764 else
25765 return false;
25766 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25768 for (i = 0; i < nelt; i += 2)
25770 if (d->perm[i] != i + odd)
25771 return false;
25772 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25773 return false;
25776 /* Success! */
25777 if (d->testing_p)
25778 return true;
25780 switch (d->vmode)
25782 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25783 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25784 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25785 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25786 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25787 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25788 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25789 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25790 default:
25791 gcc_unreachable ();
25794 in0 = d->op0;
25795 in1 = d->op1;
25796 if (BYTES_BIG_ENDIAN)
25798 x = in0, in0 = in1, in1 = x;
25799 odd = !odd;
25802 out0 = d->target;
25803 out1 = gen_reg_rtx (d->vmode);
25804 if (odd)
25805 x = out0, out0 = out1, out1 = x;
25807 emit_insn (gen (out0, in0, in1, out1));
25808 return true;
25811 /* The NEON VTBL instruction is a fully variable permuation that's even
25812 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25813 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25814 can do slightly better by expanding this as a constant where we don't
25815 have to apply a mask. */
25817 static bool
25818 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25820 rtx rperm[MAX_VECT_LEN], sel;
25821 enum machine_mode vmode = d->vmode;
25822 unsigned int i, nelt = d->nelt;
25824 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25825 numbering of elements for big-endian, we must reverse the order. */
25826 if (BYTES_BIG_ENDIAN)
25827 return false;
25829 if (d->testing_p)
25830 return true;
25832 /* Generic code will try constant permutation twice. Once with the
25833 original mode and again with the elements lowered to QImode.
25834 So wait and don't do the selector expansion ourselves. */
25835 if (vmode != V8QImode && vmode != V16QImode)
25836 return false;
25838 for (i = 0; i < nelt; ++i)
25839 rperm[i] = GEN_INT (d->perm[i]);
25840 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25841 sel = force_reg (vmode, sel);
25843 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25844 return true;
25847 static bool
25848 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25850 /* The pattern matching functions above are written to look for a small
25851 number to begin the sequence (0, 1, N/2). If we begin with an index
25852 from the second operand, we can swap the operands. */
25853 if (d->perm[0] >= d->nelt)
25855 unsigned i, nelt = d->nelt;
25856 rtx x;
25858 for (i = 0; i < nelt; ++i)
25859 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25861 x = d->op0;
25862 d->op0 = d->op1;
25863 d->op1 = x;
25866 if (TARGET_NEON)
25868 if (arm_evpc_neon_vuzp (d))
25869 return true;
25870 if (arm_evpc_neon_vzip (d))
25871 return true;
25872 if (arm_evpc_neon_vrev (d))
25873 return true;
25874 if (arm_evpc_neon_vtrn (d))
25875 return true;
25876 return arm_evpc_neon_vtbl (d);
25878 return false;
25881 /* Expand a vec_perm_const pattern. */
25883 bool
25884 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25886 struct expand_vec_perm_d d;
25887 int i, nelt, which;
25889 d.target = target;
25890 d.op0 = op0;
25891 d.op1 = op1;
25893 d.vmode = GET_MODE (target);
25894 gcc_assert (VECTOR_MODE_P (d.vmode));
25895 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25896 d.testing_p = false;
25898 for (i = which = 0; i < nelt; ++i)
25900 rtx e = XVECEXP (sel, 0, i);
25901 int ei = INTVAL (e) & (2 * nelt - 1);
25902 which |= (ei < nelt ? 1 : 2);
25903 d.perm[i] = ei;
25906 switch (which)
25908 default:
25909 gcc_unreachable();
25911 case 3:
25912 d.one_vector_p = false;
25913 if (!rtx_equal_p (op0, op1))
25914 break;
25916 /* The elements of PERM do not suggest that only the first operand
25917 is used, but both operands are identical. Allow easier matching
25918 of the permutation by folding the permutation into the single
25919 input vector. */
25920 /* FALLTHRU */
25921 case 2:
25922 for (i = 0; i < nelt; ++i)
25923 d.perm[i] &= nelt - 1;
25924 d.op0 = op1;
25925 d.one_vector_p = true;
25926 break;
25928 case 1:
25929 d.op1 = op0;
25930 d.one_vector_p = true;
25931 break;
25934 return arm_expand_vec_perm_const_1 (&d);
25937 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25939 static bool
25940 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25941 const unsigned char *sel)
25943 struct expand_vec_perm_d d;
25944 unsigned int i, nelt, which;
25945 bool ret;
25947 d.vmode = vmode;
25948 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25949 d.testing_p = true;
25950 memcpy (d.perm, sel, nelt);
25952 /* Categorize the set of elements in the selector. */
25953 for (i = which = 0; i < nelt; ++i)
25955 unsigned char e = d.perm[i];
25956 gcc_assert (e < 2 * nelt);
25957 which |= (e < nelt ? 1 : 2);
25960 /* For all elements from second vector, fold the elements to first. */
25961 if (which == 2)
25962 for (i = 0; i < nelt; ++i)
25963 d.perm[i] -= nelt;
25965 /* Check whether the mask can be applied to the vector type. */
25966 d.one_vector_p = (which != 3);
25968 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25969 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25970 if (!d.one_vector_p)
25971 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25973 start_sequence ();
25974 ret = arm_expand_vec_perm_const_1 (&d);
25975 end_sequence ();
25977 return ret;
25980 bool
25981 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
25983 /* If we are soft float and we do not have ldrd
25984 then all auto increment forms are ok. */
25985 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
25986 return true;
25988 switch (code)
25990 /* Post increment and Pre Decrement are supported for all
25991 instruction forms except for vector forms. */
25992 case ARM_POST_INC:
25993 case ARM_PRE_DEC:
25994 if (VECTOR_MODE_P (mode))
25996 if (code != ARM_PRE_DEC)
25997 return true;
25998 else
25999 return false;
26002 return true;
26004 case ARM_POST_DEC:
26005 case ARM_PRE_INC:
26006 /* Without LDRD and mode size greater than
26007 word size, there is no point in auto-incrementing
26008 because ldm and stm will not have these forms. */
26009 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26010 return false;
26012 /* Vector and floating point modes do not support
26013 these auto increment forms. */
26014 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26015 return false;
26017 return true;
26019 default:
26020 return false;
26024 return false;
26027 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26028 on ARM, since we know that shifts by negative amounts are no-ops.
26029 Additionally, the default expansion code is not available or suitable
26030 for post-reload insn splits (this can occur when the register allocator
26031 chooses not to do a shift in NEON).
26033 This function is used in both initial expand and post-reload splits, and
26034 handles all kinds of 64-bit shifts.
26036 Input requirements:
26037 - It is safe for the input and output to be the same register, but
26038 early-clobber rules apply for the shift amount and scratch registers.
26039 - Shift by register requires both scratch registers. Shift by a constant
26040 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26041 the scratch registers may be NULL.
26042 - Ashiftrt by a register also clobbers the CC register. */
26043 void
26044 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26045 rtx amount, rtx scratch1, rtx scratch2)
26047 rtx out_high = gen_highpart (SImode, out);
26048 rtx out_low = gen_lowpart (SImode, out);
26049 rtx in_high = gen_highpart (SImode, in);
26050 rtx in_low = gen_lowpart (SImode, in);
26052 /* Terminology:
26053 in = the register pair containing the input value.
26054 out = the destination register pair.
26055 up = the high- or low-part of each pair.
26056 down = the opposite part to "up".
26057 In a shift, we can consider bits to shift from "up"-stream to
26058 "down"-stream, so in a left-shift "up" is the low-part and "down"
26059 is the high-part of each register pair. */
26061 rtx out_up = code == ASHIFT ? out_low : out_high;
26062 rtx out_down = code == ASHIFT ? out_high : out_low;
26063 rtx in_up = code == ASHIFT ? in_low : in_high;
26064 rtx in_down = code == ASHIFT ? in_high : in_low;
26066 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26067 gcc_assert (out
26068 && (REG_P (out) || GET_CODE (out) == SUBREG)
26069 && GET_MODE (out) == DImode);
26070 gcc_assert (in
26071 && (REG_P (in) || GET_CODE (in) == SUBREG)
26072 && GET_MODE (in) == DImode);
26073 gcc_assert (amount
26074 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26075 && GET_MODE (amount) == SImode)
26076 || CONST_INT_P (amount)));
26077 gcc_assert (scratch1 == NULL
26078 || (GET_CODE (scratch1) == SCRATCH)
26079 || (GET_MODE (scratch1) == SImode
26080 && REG_P (scratch1)));
26081 gcc_assert (scratch2 == NULL
26082 || (GET_CODE (scratch2) == SCRATCH)
26083 || (GET_MODE (scratch2) == SImode
26084 && REG_P (scratch2)));
26085 gcc_assert (!REG_P (out) || !REG_P (amount)
26086 || !HARD_REGISTER_P (out)
26087 || (REGNO (out) != REGNO (amount)
26088 && REGNO (out) + 1 != REGNO (amount)));
26090 /* Macros to make following code more readable. */
26091 #define SUB_32(DEST,SRC) \
26092 gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32))
26093 #define RSB_32(DEST,SRC) \
26094 gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC))
26095 #define SUB_S_32(DEST,SRC) \
26096 gen_addsi3_compare0 ((DEST), (SRC), \
26097 gen_rtx_CONST_INT (VOIDmode, -32))
26098 #define SET(DEST,SRC) \
26099 gen_rtx_SET (SImode, (DEST), (SRC))
26100 #define SHIFT(CODE,SRC,AMOUNT) \
26101 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26102 #define LSHIFT(CODE,SRC,AMOUNT) \
26103 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26104 SImode, (SRC), (AMOUNT))
26105 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26106 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26107 SImode, (SRC), (AMOUNT))
26108 #define ORR(A,B) \
26109 gen_rtx_IOR (SImode, (A), (B))
26110 #define BRANCH(COND,LABEL) \
26111 gen_arm_cond_branch ((LABEL), \
26112 gen_rtx_ ## COND (CCmode, cc_reg, \
26113 const0_rtx), \
26114 cc_reg)
26116 /* Shifts by register and shifts by constant are handled separately. */
26117 if (CONST_INT_P (amount))
26119 /* We have a shift-by-constant. */
26121 /* First, handle out-of-range shift amounts.
26122 In both cases we try to match the result an ARM instruction in a
26123 shift-by-register would give. This helps reduce execution
26124 differences between optimization levels, but it won't stop other
26125 parts of the compiler doing different things. This is "undefined
26126 behaviour, in any case. */
26127 if (INTVAL (amount) <= 0)
26128 emit_insn (gen_movdi (out, in));
26129 else if (INTVAL (amount) >= 64)
26131 if (code == ASHIFTRT)
26133 rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31);
26134 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26135 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26137 else
26138 emit_insn (gen_movdi (out, const0_rtx));
26141 /* Now handle valid shifts. */
26142 else if (INTVAL (amount) < 32)
26144 /* Shifts by a constant less than 32. */
26145 rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode,
26146 32 - INTVAL (amount));
26148 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26149 emit_insn (SET (out_down,
26150 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26151 out_down)));
26152 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26154 else
26156 /* Shifts by a constant greater than 31. */
26157 rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32);
26159 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26160 if (code == ASHIFTRT)
26161 emit_insn (gen_ashrsi3 (out_up, in_up,
26162 gen_rtx_CONST_INT (VOIDmode, 31)));
26163 else
26164 emit_insn (SET (out_up, const0_rtx));
26167 else
26169 /* We have a shift-by-register. */
26170 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26172 /* This alternative requires the scratch registers. */
26173 gcc_assert (scratch1 && REG_P (scratch1));
26174 gcc_assert (scratch2 && REG_P (scratch2));
26176 /* We will need the values "amount-32" and "32-amount" later.
26177 Swapping them around now allows the later code to be more general. */
26178 switch (code)
26180 case ASHIFT:
26181 emit_insn (SUB_32 (scratch1, amount));
26182 emit_insn (RSB_32 (scratch2, amount));
26183 break;
26184 case ASHIFTRT:
26185 emit_insn (RSB_32 (scratch1, amount));
26186 /* Also set CC = amount > 32. */
26187 emit_insn (SUB_S_32 (scratch2, amount));
26188 break;
26189 case LSHIFTRT:
26190 emit_insn (RSB_32 (scratch1, amount));
26191 emit_insn (SUB_32 (scratch2, amount));
26192 break;
26193 default:
26194 gcc_unreachable ();
26197 /* Emit code like this:
26199 arithmetic-left:
26200 out_down = in_down << amount;
26201 out_down = (in_up << (amount - 32)) | out_down;
26202 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26203 out_up = in_up << amount;
26205 arithmetic-right:
26206 out_down = in_down >> amount;
26207 out_down = (in_up << (32 - amount)) | out_down;
26208 if (amount < 32)
26209 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26210 out_up = in_up << amount;
26212 logical-right:
26213 out_down = in_down >> amount;
26214 out_down = (in_up << (32 - amount)) | out_down;
26215 if (amount < 32)
26216 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26217 out_up = in_up << amount;
26219 The ARM and Thumb2 variants are the same but implemented slightly
26220 differently. If this were only called during expand we could just
26221 use the Thumb2 case and let combine do the right thing, but this
26222 can also be called from post-reload splitters. */
26224 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26226 if (!TARGET_THUMB2)
26228 /* Emit code for ARM mode. */
26229 emit_insn (SET (out_down,
26230 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26231 if (code == ASHIFTRT)
26233 rtx done_label = gen_label_rtx ();
26234 emit_jump_insn (BRANCH (LT, done_label));
26235 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26236 out_down)));
26237 emit_label (done_label);
26239 else
26240 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26241 out_down)));
26243 else
26245 /* Emit code for Thumb2 mode.
26246 Thumb2 can't do shift and or in one insn. */
26247 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26248 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26250 if (code == ASHIFTRT)
26252 rtx done_label = gen_label_rtx ();
26253 emit_jump_insn (BRANCH (LT, done_label));
26254 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26255 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26256 emit_label (done_label);
26258 else
26260 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26261 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26265 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26268 #undef SUB_32
26269 #undef RSB_32
26270 #undef SUB_S_32
26271 #undef SET
26272 #undef SHIFT
26273 #undef LSHIFT
26274 #undef REV_LSHIFT
26275 #undef ORR
26276 #undef BRANCH
26280 /* Returns true if a valid comparison operation and makes
26281 the operands in a form that is valid. */
26282 bool
26283 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26285 enum rtx_code code = GET_CODE (*comparison);
26286 enum rtx_code canonical_code;
26287 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26288 ? GET_MODE (*op2) : GET_MODE (*op1);
26290 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26292 if (code == UNEQ || code == LTGT)
26293 return false;
26295 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26296 PUT_CODE (*comparison, canonical_code);
26298 switch (mode)
26300 case SImode:
26301 if (!arm_add_operand (*op1, mode))
26302 *op1 = force_reg (mode, *op1);
26303 if (!arm_add_operand (*op2, mode))
26304 *op2 = force_reg (mode, *op2);
26305 return true;
26307 case DImode:
26308 if (!cmpdi_operand (*op1, mode))
26309 *op1 = force_reg (mode, *op1);
26310 if (!cmpdi_operand (*op2, mode))
26311 *op2 = force_reg (mode, *op2);
26312 return true;
26314 case SFmode:
26315 case DFmode:
26316 if (!arm_float_compare_operand (*op1, mode))
26317 *op1 = force_reg (mode, *op1);
26318 if (!arm_float_compare_operand (*op2, mode))
26319 *op2 = force_reg (mode, *op2);
26320 return true;
26321 default:
26322 break;
26325 return false;
26329 #include "gt-arm.h"